aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/6lowpan.h2
-rw-r--r--include/net/9p/9p.h17
-rw-r--r--include/net/9p/client.h84
-rw-r--r--include/net/9p/transport.h35
-rw-r--r--include/net/Space.h10
-rw-r--r--include/net/act_api.h101
-rw-r--r--include/net/addrconf.h15
-rw-r--r--include/net/af_rxrpc.h20
-rw-r--r--include/net/af_unix.h29
-rw-r--r--include/net/af_vsock.h14
-rw-r--r--include/net/amt.h408
-rw-r--r--include/net/arp.h9
-rw-r--r--include/net/ax25.h42
-rw-r--r--include/net/ax88796.h9
-rw-r--r--include/net/bareudp.h16
-rw-r--r--include/net/bluetooth/bluetooth.h230
-rw-r--r--include/net/bluetooth/hci.h639
-rw-r--r--include/net/bluetooth/hci_core.h669
-rw-r--r--include/net/bluetooth/hci_sock.h10
-rw-r--r--include/net/bluetooth/hci_sync.h131
-rw-r--r--include/net/bluetooth/iso.h32
-rw-r--r--include/net/bluetooth/l2cap.h62
-rw-r--r--include/net/bluetooth/mgmt.h330
-rw-r--r--include/net/bluetooth/rfcomm.h3
-rw-r--r--include/net/bluetooth/sco.h2
-rw-r--r--include/net/bond_3ad.h7
-rw-r--r--include/net/bond_alb.h6
-rw-r--r--include/net/bond_options.h51
-rw-r--r--include/net/bonding.h131
-rw-r--r--include/net/bpf_sk_storage.h40
-rw-r--r--include/net/busy_poll.h51
-rw-r--r--include/net/caif/caif_dev.h2
-rw-r--r--include/net/caif/caif_hsi.h200
-rw-r--r--include/net/caif/caif_layer.h4
-rw-r--r--include/net/caif/caif_spi.h155
-rw-r--r--include/net/caif/cfcnfg.h2
-rw-r--r--include/net/caif/cfserl.h1
-rw-r--r--include/net/cfg80211.h1882
-rw-r--r--include/net/cfg802154.h20
-rw-r--r--include/net/checksum.h98
-rw-r--r--include/net/cipso_ipv4.h12
-rw-r--r--include/net/cls_cgroup.h7
-rw-r--r--include/net/codel.h7
-rw-r--r--include/net/codel_impl.h20
-rw-r--r--include/net/codel_qdisc.h3
-rw-r--r--include/net/compat.h71
-rw-r--r--include/net/datalink.h9
-rw-r--r--include/net/dcbevent.h2
-rw-r--r--include/net/dcbnl.h2
-rw-r--r--include/net/devlink.h1176
-rw-r--r--include/net/dn.h231
-rw-r--r--include/net/dn_dev.h199
-rw-r--r--include/net/dn_fib.h167
-rw-r--r--include/net/dn_neigh.h30
-rw-r--r--include/net/dn_nsp.h195
-rw-r--r--include/net/dn_route.h115
-rw-r--r--include/net/drop_monitor.h33
-rw-r--r--include/net/dropreason.h323
-rw-r--r--include/net/dsa.h984
-rw-r--r--include/net/dst.h64
-rw-r--r--include/net/dst_cache.h11
-rw-r--r--include/net/dst_metadata.h59
-rw-r--r--include/net/dst_ops.h4
-rw-r--r--include/net/erspan.h22
-rw-r--r--include/net/esp.h17
-rw-r--r--include/net/espintcp.h1
-rw-r--r--include/net/ethoc.h3
-rw-r--r--include/net/failover.h1
-rw-r--r--include/net/fib_rules.h43
-rw-r--r--include/net/firewire.h5
-rw-r--r--include/net/flow.h56
-rw-r--r--include/net/flow_dissector.h86
-rw-r--r--include/net/flow_offload.h303
-rw-r--r--include/net/fq.h16
-rw-r--r--include/net/fq_impl.h173
-rw-r--r--include/net/garp.h2
-rw-r--r--include/net/gen_stats.h59
-rw-r--r--include/net/genetlink.h110
-rw-r--r--include/net/gre.h19
-rw-r--r--include/net/gro.h450
-rw-r--r--include/net/gtp.h46
-rw-r--r--include/net/gue.h5
-rw-r--r--include/net/hwbm.h2
-rw-r--r--include/net/icmp.h7
-rw-r--r--include/net/ieee80211_radiotap.h11
-rw-r--r--include/net/ieee802154_netdev.h43
-rw-r--r--include/net/if_inet6.h56
-rw-r--r--include/net/ila.h2
-rw-r--r--include/net/inet6_connection_sock.h2
-rw-r--r--include/net/inet6_hashtables.h30
-rw-r--r--include/net/inet_common.h16
-rw-r--r--include/net/inet_connection_sock.h77
-rw-r--r--include/net/inet_dscp.h57
-rw-r--r--include/net/inet_ecn.h101
-rw-r--r--include/net/inet_frag.h17
-rw-r--r--include/net/inet_hashtables.h183
-rw-r--r--include/net/inet_sock.h53
-rw-r--r--include/net/inet_timewait_sock.h5
-rw-r--r--include/net/ioam6.h68
-rw-r--r--include/net/ip.h113
-rw-r--r--include/net/ip6_checksum.h30
-rw-r--r--include/net/ip6_fib.h82
-rw-r--r--include/net/ip6_route.h56
-rw-r--r--include/net/ip6_tunnel.h3
-rw-r--r--include/net/ip_fib.h83
-rw-r--r--include/net/ip_tunnels.h61
-rw-r--r--include/net/ip_vs.h69
-rw-r--r--include/net/ipcomp.h4
-rw-r--r--include/net/ipconfig.h2
-rw-r--r--include/net/ipv6.h212
-rw-r--r--include/net/ipv6_frag.h34
-rw-r--r--include/net/ipv6_stubs.h20
-rw-r--r--include/net/ipx.h171
-rw-r--r--include/net/iucv/af_iucv.h21
-rw-r--r--include/net/l3mdev.h39
-rw-r--r--include/net/lapb.h2
-rw-r--r--include/net/llc.h6
-rw-r--r--include/net/llc_c_ac.h7
-rw-r--r--include/net/llc_c_st.h4
-rw-r--r--include/net/llc_conn.h1
-rw-r--r--include/net/llc_if.h3
-rw-r--r--include/net/llc_pdu.h31
-rw-r--r--include/net/llc_s_ac.h4
-rw-r--r--include/net/llc_s_ev.h1
-rw-r--r--include/net/llc_s_st.h6
-rw-r--r--include/net/lwtunnel.h11
-rw-r--r--include/net/mac80211.h1159
-rw-r--r--include/net/mac802154.h31
-rw-r--r--include/net/macsec.h105
-rw-r--r--include/net/mctp.h298
-rw-r--r--include/net/mctpdevice.h56
-rw-r--r--include/net/mip6.h2
-rw-r--r--include/net/mld.h9
-rw-r--r--include/net/mpls.h17
-rw-r--r--include/net/mpls_iptunnel.h5
-rw-r--r--include/net/mptcp.h157
-rw-r--r--include/net/mrp.h4
-rw-r--r--include/net/ncsi.h2
-rw-r--r--include/net/ndisc.h36
-rw-r--r--include/net/neighbour.h94
-rw-r--r--include/net/net_debug.h157
-rw-r--r--include/net/net_namespace.h114
-rw-r--r--include/net/net_trackers.h18
-rw-r--r--include/net/netevent.h1
-rw-r--r--include/net/netfilter/ipv4/nf_defrag_ipv4.h3
-rw-r--r--include/net/netfilter/ipv4/nf_reject.h14
-rw-r--r--include/net/netfilter/ipv6/nf_conntrack_ipv6.h3
-rw-r--r--include/net/netfilter/ipv6/nf_defrag_ipv6.h8
-rw-r--r--include/net/netfilter/ipv6/nf_reject.h14
-rw-r--r--include/net/netfilter/nf_conntrack.h69
-rw-r--r--include/net/netfilter/nf_conntrack_acct.h12
-rw-r--r--include/net/netfilter/nf_conntrack_act_ct.h50
-rw-r--r--include/net/netfilter/nf_conntrack_bpf.h46
-rw-r--r--include/net/netfilter/nf_conntrack_core.h23
-rw-r--r--include/net/netfilter/nf_conntrack_count.h1
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h129
-rw-r--r--include/net/netfilter/nf_conntrack_extend.h49
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h1
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h42
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h13
-rw-r--r--include/net/netfilter/nf_conntrack_seqadj.h3
-rw-r--r--include/net/netfilter/nf_conntrack_timeout.h30
-rw-r--r--include/net/netfilter/nf_conntrack_timestamp.h13
-rw-r--r--include/net/netfilter/nf_flow_table.h203
-rw-r--r--include/net/netfilter/nf_hooks_lwtunnel.h7
-rw-r--r--include/net/netfilter/nf_log.h24
-rw-r--r--include/net/netfilter/nf_nat.h4
-rw-r--r--include/net/netfilter/nf_nat_helper.h1
-rw-r--r--include/net/netfilter/nf_queue.h15
-rw-r--r--include/net/netfilter/nf_reject.h21
-rw-r--r--include/net/netfilter/nf_tables.h413
-rw-r--r--include/net/netfilter/nf_tables_core.h89
-rw-r--r--include/net/netfilter/nf_tables_ipv4.h64
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h78
-rw-r--r--include/net/netfilter/nf_tables_offload.h22
-rw-r--r--include/net/netfilter/nft_fib.h5
-rw-r--r--include/net/netfilter/nft_meta.h7
-rw-r--r--include/net/netfilter/xt_rateest.h2
-rw-r--r--include/net/netlink.h251
-rw-r--r--include/net/netns/bpf.h28
-rw-r--r--include/net/netns/can.h2
-rw-r--r--include/net/netns/conntrack.h42
-rw-r--r--include/net/netns/core.h4
-rw-r--r--include/net/netns/dccp.h12
-rw-r--r--include/net/netns/flow_table.h14
-rw-r--r--include/net/netns/generic.h3
-rw-r--r--include/net/netns/ipv4.h157
-rw-r--r--include/net/netns/ipv6.h53
-rw-r--r--include/net/netns/mctp.h37
-rw-r--r--include/net/netns/mib.h33
-rw-r--r--include/net/netns/mpls.h2
-rw-r--r--include/net/netns/netfilter.h8
-rw-r--r--include/net/netns/nexthop.h2
-rw-r--r--include/net/netns/nftables.h6
-rw-r--r--include/net/netns/sctp.h14
-rw-r--r--include/net/netns/smc.h26
-rw-r--r--include/net/netns/unix.h8
-rw-r--r--include/net/netns/x_tables.h21
-rw-r--r--include/net/netns/xfrm.h9
-rw-r--r--include/net/netrom.h1
-rw-r--r--include/net/nexthop.h314
-rw-r--r--include/net/nfc/digital.h4
-rw-r--r--include/net/nfc/hci.h6
-rw-r--r--include/net/nfc/nci.h48
-rw-r--r--include/net/nfc/nci_core.h34
-rw-r--r--include/net/nfc/nfc.h18
-rw-r--r--include/net/nl802154.h13
-rw-r--r--include/net/p8022.h5
-rw-r--r--include/net/page_pool.h226
-rw-r--r--include/net/phonet/pep.h3
-rw-r--r--include/net/phonet/phonet.h4
-rw-r--r--include/net/phonet/pn_dev.h5
-rw-r--r--include/net/pie.h31
-rw-r--r--include/net/ping.h4
-rw-r--r--include/net/pkt_cls.h231
-rw-r--r--include/net/pkt_sched.h116
-rw-r--r--include/net/pptp.h3
-rw-r--r--include/net/protocol.h5
-rw-r--r--include/net/psample.h23
-rw-r--r--include/net/psnap.h5
-rw-r--r--include/net/raw.h20
-rw-r--r--include/net/rawv6.h7
-rw-r--r--include/net/red.h59
-rw-r--r--include/net/regulatory.h12
-rw-r--r--include/net/request_sock.h13
-rw-r--r--include/net/rose.h12
-rw-r--r--include/net/route.h60
-rw-r--r--include/net/rpl.h40
-rw-r--r--include/net/rtnetlink.h33
-rw-r--r--include/net/sch_generic.h273
-rw-r--r--include/net/sctp/command.h2
-rw-r--r--include/net/sctp/constants.h34
-rw-r--r--include/net/sctp/sctp.h89
-rw-r--r--include/net/sctp/sm.h16
-rw-r--r--include/net/sctp/structs.h93
-rw-r--r--include/net/sctp/ulpevent.h2
-rw-r--r--include/net/secure_seq.h6
-rw-r--r--include/net/seg6.h23
-rw-r--r--include/net/selftests.h31
-rw-r--r--include/net/smc.h13
-rw-r--r--include/net/sock.h796
-rw-r--r--include/net/sock_reuseport.h22
-rw-r--r--include/net/stp.h2
-rw-r--r--include/net/strparser.h27
-rw-r--r--include/net/switchdev.h286
-rw-r--r--include/net/tc_act/tc_ct.h34
-rw-r--r--include/net/tc_act/tc_gact.h15
-rw-r--r--include/net/tc_act/tc_gate.h141
-rw-r--r--include/net/tc_act/tc_mirred.h1
-rw-r--r--include/net/tc_act/tc_pedit.h1
-rw-r--r--include/net/tc_act/tc_police.h130
-rw-r--r--include/net/tc_act/tc_skbedit.h54
-rw-r--r--include/net/tc_act/tc_tunnel_key.h15
-rw-r--r--include/net/tc_act/tc_vlan.h13
-rw-r--r--include/net/tcp.h427
-rw-r--r--include/net/tls.h397
-rw-r--r--include/net/transp_v6.h5
-rw-r--r--include/net/tso.h23
-rw-r--r--include/net/tun_proto.h3
-rw-r--r--include/net/udp.h80
-rw-r--r--include/net/udp_tunnel.h210
-rw-r--r--include/net/udplite.h52
-rw-r--r--include/net/vxlan.h83
-rw-r--r--include/net/wimax.h503
-rw-r--r--include/net/xdp.h286
-rw-r--r--include/net/xdp_priv.h2
-rw-r--r--include/net/xdp_sock.h334
-rw-r--r--include/net/xdp_sock_drv.h289
-rw-r--r--include/net/xfrm.h246
-rw-r--r--include/net/xsk_buff_pool.h229
270 files changed, 17241 insertions, 6393 deletions
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index a71378007e61..c80539be1542 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -138,7 +138,7 @@ struct lowpan_dev {
struct lowpan_iphc_ctx_table ctx;
/* must be last */
- u8 priv[0] __aligned(sizeof(void *));
+ u8 priv[] __aligned(sizeof(void *));
};
struct lowpan_802154_neigh {
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 03614de86942..13abe013af21 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * include/net/9p/9p.h
- *
* 9P protocol definitions.
*
* Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
@@ -32,13 +30,13 @@
*/
enum p9_debug_flags {
- P9_DEBUG_ERROR = (1<<0),
- P9_DEBUG_9P = (1<<2),
+ P9_DEBUG_ERROR = (1<<0),
+ P9_DEBUG_9P = (1<<2),
P9_DEBUG_VFS = (1<<3),
P9_DEBUG_CONV = (1<<4),
P9_DEBUG_MUX = (1<<5),
P9_DEBUG_TRANS = (1<<6),
- P9_DEBUG_SLABS = (1<<7),
+ P9_DEBUG_SLABS = (1<<7),
P9_DEBUG_FCALL = (1<<8),
P9_DEBUG_FID = (1<<9),
P9_DEBUG_PKT = (1<<10),
@@ -317,8 +315,8 @@ enum p9_qid_t {
};
/* 9P Magic Numbers */
-#define P9_NOTAG (u16)(~0)
-#define P9_NOFID (u32)(~0)
+#define P9_NOTAG ((u16)(~0))
+#define P9_NOFID ((u32)(~0))
#define P9_MAXWELEM 16
/* Minimal header size: size[4] type[1] tag[2] */
@@ -333,6 +331,9 @@ enum p9_qid_t {
/* size of header for zero copy read/write */
#define P9_ZC_HDR_SZ 4096
+/* maximum length of an error string */
+#define P9_ERRMAX 128
+
/**
* struct p9_qid - file system entity information
* @type: 8-bit type &p9_qid_t
@@ -553,6 +554,4 @@ struct p9_fcall {
int p9_errstr2errno(char *errstr, int len);
int p9_error_init(void);
-int p9_trans_fd_init(void);
-void p9_trans_fd_exit(void);
#endif /* NET_9P_H */
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index acc60d8a3b3b..78ebcf782ce5 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * include/net/9p/client.h
- *
* 9P Client Definitions
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
@@ -13,6 +11,7 @@
#include <linux/utsname.h>
#include <linux/idr.h>
+#include <linux/tracepoint-defs.h>
/* Number of requests per row */
#define P9_ROW_MAXTAG 255
@@ -23,7 +22,7 @@
* @p9_proto_2000L: 9P2000.L extension
*/
-enum p9_proto_versions{
+enum p9_proto_versions {
p9_proto_legacy,
p9_proto_2000u,
p9_proto_2000L,
@@ -73,17 +72,15 @@ enum p9_req_status_t {
* @wq: wait_queue for the client to block on for this request
* @tc: the request fcall structure
* @rc: the response fcall structure
- * @aux: transport specific data (provided for trans_fd migration)
* @req_list: link for higher level objects to chain requests
*/
struct p9_req_t {
int status;
int t_err;
- struct kref refcount;
+ refcount_t refcount;
wait_queue_head_t wq;
struct p9_fcall tc;
struct p9_fcall rc;
- void *aux;
struct list_head req_list;
};
@@ -142,10 +139,16 @@ struct p9_client {
*
* TODO: This needs lots of explanation.
*/
+enum fid_source {
+ FID_FROM_OTHER,
+ FID_FROM_INODE,
+ FID_FROM_DENTRY,
+};
struct p9_fid {
struct p9_client *clnt;
u32 fid;
+ refcount_t count;
int mode;
struct p9_qid qid;
u32 iounit;
@@ -154,6 +157,7 @@ struct p9_fid {
void *rdir;
struct hlist_node dlist; /* list of all fids attached to a dentry */
+ struct hlist_node ilist;
};
/**
@@ -200,6 +204,8 @@ int p9_client_fsync(struct p9_fid *fid, int datasync);
int p9_client_remove(struct p9_fid *fid);
int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags);
int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err);
+int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
+ int *err);
int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err);
int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset);
int p9dirent_read(struct p9_client *clnt, char *buf, int len,
@@ -212,36 +218,80 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
u64 request_mask);
int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode,
- dev_t rdev, kgid_t gid, struct p9_qid *);
+ dev_t rdev, kgid_t gid, struct p9_qid *qid);
int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
- kgid_t gid, struct p9_qid *);
+ kgid_t gid, struct p9_qid *qid);
int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
void p9_fcall_fini(struct p9_fcall *fc);
-struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag);
static inline void p9_req_get(struct p9_req_t *r)
{
- kref_get(&r->refcount);
+ refcount_inc(&r->refcount);
}
static inline int p9_req_try_get(struct p9_req_t *r)
{
- return kref_get_unless_zero(&r->refcount);
+ return refcount_inc_not_zero(&r->refcount);
}
-int p9_req_put(struct p9_req_t *r);
+int p9_req_put(struct p9_client *c, struct p9_req_t *r);
+
+/* We cannot have the real tracepoints in header files,
+ * use a wrapper function */
+DECLARE_TRACEPOINT(9p_fid_ref);
+void do_trace_9p_fid_get(struct p9_fid *fid);
+void do_trace_9p_fid_put(struct p9_fid *fid);
+
+/* fid reference counting helpers:
+ * - fids used for any length of time should always be referenced through
+ * p9_fid_get(), and released with p9_fid_put()
+ * - v9fs_fid_lookup() or similar will automatically call get for you
+ * and also require a put
+ * - the *_fid_add() helpers will stash the fid in the inode,
+ * at which point it is the responsibility of evict_inode()
+ * to call the put
+ * - the last put will automatically send a clunk to the server
+ */
+static inline struct p9_fid *p9_fid_get(struct p9_fid *fid)
+{
+ if (tracepoint_enabled(9p_fid_ref))
+ do_trace_9p_fid_get(fid);
+
+ refcount_inc(&fid->count);
+
+ return fid;
+}
+
+static inline int p9_fid_put(struct p9_fid *fid)
+{
+ if (!fid || IS_ERR(fid))
+ return 0;
+
+ if (tracepoint_enabled(9p_fid_ref))
+ do_trace_9p_fid_put(fid);
+
+ if (!refcount_dec_and_test(&fid->count))
+ return 0;
+
+ return p9_client_clunk(fid);
+}
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
-int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
-int p9stat_read(struct p9_client *, char *, int, struct p9_wstat *);
-void p9stat_free(struct p9_wstat *);
+int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type,
+ int16_t *tag, int rewind);
+int p9stat_read(struct p9_client *clnt, char *buf, int len,
+ struct p9_wstat *st);
+void p9stat_free(struct p9_wstat *stbuf);
int p9_is_proto_dotu(struct p9_client *clnt);
int p9_is_proto_dotl(struct p9_client *clnt);
-struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *);
-int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int);
+struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
+ const char *attr_name, u64 *attr_size);
+int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
+ u64 attr_size, int flags);
int p9_client_readlink(struct p9_fid *fid, char **target);
int p9_client_init(void);
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 98a2be2de04a..766ec07c9599 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * include/net/9p/transport.h
- *
* Transport Definition
*
* Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
@@ -11,6 +9,8 @@
#ifndef NET_9P_TRANSPORT_H
#define NET_9P_TRANSPORT_H
+#include <linux/module.h>
+
#define P9_DEF_MIN_RESVPORT (665U)
#define P9_DEF_MAX_RESVPORT (1023U)
@@ -19,13 +19,17 @@
* @list: used to maintain a list of currently available transports
* @name: the human-readable name of the transport
* @maxsize: transport provided maximum packet size
+ * @pooled_rbuffers: currently only set for RDMA transport which pulls the
+ * response buffers from a shared pool, and accordingly
+ * we're less flexible when choosing the response message
+ * size in this case
* @def: set if this transport should be considered the default
* @create: member function to create a new connection on this transport
* @close: member function to discard a connection on this transport
* @request: member function to issue a request to the transport
* @cancel: member function to cancel a request (if it hasn't been sent)
* @cancelled: member function to notify that a cancelled request will not
- * not receive a reply
+ * receive a reply
*
* This is the basic API for a transport module which is registered by the
* transport module with the 9P core network module and used by the client
@@ -38,21 +42,28 @@ struct p9_trans_module {
struct list_head list;
char *name; /* name of transport */
int maxsize; /* max message size of transport */
+ bool pooled_rbuffers;
int def; /* this transport should be default */
struct module *owner;
- int (*create)(struct p9_client *, const char *, char *);
- void (*close) (struct p9_client *);
- int (*request) (struct p9_client *, struct p9_req_t *req);
- int (*cancel) (struct p9_client *, struct p9_req_t *req);
- int (*cancelled)(struct p9_client *, struct p9_req_t *req);
- int (*zc_request)(struct p9_client *, struct p9_req_t *,
- struct iov_iter *, struct iov_iter *, int , int, int);
- int (*show_options)(struct seq_file *, struct p9_client *);
+ int (*create)(struct p9_client *client,
+ const char *devname, char *args);
+ void (*close)(struct p9_client *client);
+ int (*request)(struct p9_client *client, struct p9_req_t *req);
+ int (*cancel)(struct p9_client *client, struct p9_req_t *req);
+ int (*cancelled)(struct p9_client *client, struct p9_req_t *req);
+ int (*zc_request)(struct p9_client *client, struct p9_req_t *req,
+ struct iov_iter *uidata, struct iov_iter *uodata,
+ int inlen, int outlen, int in_hdr_len);
+ int (*show_options)(struct seq_file *m, struct p9_client *client);
};
void v9fs_register_trans(struct p9_trans_module *m);
void v9fs_unregister_trans(struct p9_trans_module *m);
-struct p9_trans_module *v9fs_get_trans_by_name(char *s);
+struct p9_trans_module *v9fs_get_trans_by_name(const char *s);
struct p9_trans_module *v9fs_get_default_trans(void);
void v9fs_put_trans(struct p9_trans_module *m);
+
+#define MODULE_ALIAS_9P(transport) \
+ MODULE_ALIAS("9p-" transport)
+
#endif /* NET_9P_TRANSPORT_H */
diff --git a/include/net/Space.h b/include/net/Space.h
index 9cce0d80d37a..08ca9cef0213 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit);
struct net_device *wd_probe(int unit);
struct net_device *ne_probe(int unit);
struct net_device *fmv18x_probe(int unit);
-struct net_device *i82596_probe(int unit);
struct net_device *ni65_probe(int unit);
struct net_device *sonic_probe(int unit);
struct net_device *smc_init(int unit);
-struct net_device *atarilance_probe(int unit);
-struct net_device *sun3lance_probe(int unit);
-struct net_device *sun3_82586_probe(int unit);
-struct net_device *apne_probe(int unit);
struct net_device *cs89x0_probe(int unit);
-struct net_device *mvme147lance_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
struct net_device *cops_probe(int unit);
-struct net_device *ltpc_probe(void);
/* Fibre Channel adapters */
int iph5526_probe(struct net_device *dev);
-
-/* SBNI adapters */
-int sbni_probe(int unit);
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 71347a90a9d1..61f2ceb3939e 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -7,6 +7,7 @@
*/
#include <linux/refcount.h>
+#include <net/flow_offload.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/net_namespace.h>
@@ -30,17 +31,21 @@ struct tc_action {
atomic_t tcfa_bindcnt;
int tcfa_action;
struct tcf_t tcfa_tm;
- struct gnet_stats_basic_packed tcfa_bstats;
- struct gnet_stats_basic_packed tcfa_bstats_hw;
+ struct gnet_stats_basic_sync tcfa_bstats;
+ struct gnet_stats_basic_sync tcfa_bstats_hw;
struct gnet_stats_queue tcfa_qstats;
struct net_rate_estimator __rcu *tcfa_rate_est;
spinlock_t tcfa_lock;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
struct gnet_stats_queue __percpu *cpu_qstats;
struct tc_cookie __rcu *act_cookie;
struct tcf_chain __rcu *goto_chain;
u32 tcfa_flags;
+ u8 hw_stats;
+ u8 used_hw_stats;
+ bool used_hw_stats_valid;
+ u32 in_hw_count;
};
#define tcf_index common.tcfa_index
#define tcf_refcnt common.tcfa_refcnt
@@ -52,6 +57,17 @@ struct tc_action {
#define tcf_rate_est common.tcfa_rate_est
#define tcf_lock common.tcfa_lock
+#define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \
+ TCA_ACT_HW_STATS_DELAYED)
+
+/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */
+#define TCA_ACT_FLAGS_USER_BITS 16
+#define TCA_ACT_FLAGS_USER_MASK 0xffff
+#define TCA_ACT_FLAGS_POLICE (1U << TCA_ACT_FLAGS_USER_BITS)
+#define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1))
+#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
+#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
+
/* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice.
*/
@@ -69,10 +85,21 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
{
dtm->install = jiffies_to_clock_t(jiffies - stm->install);
dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse);
- dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse);
+ dtm->firstuse = stm->firstuse ?
+ jiffies_to_clock_t(jiffies - stm->firstuse) : 0;
dtm->expires = jiffies_to_clock_t(stm->expires);
}
+static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
+{
+ if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
+ return FLOW_ACTION_HW_STATS_DONT_CARE;
+ else if (!hw_stats)
+ return FLOW_ACTION_HW_STATS_DISABLED;
+
+ return hw_stats;
+}
+
#ifdef CONFIG_NET_CLS_ACT
#define ACT_P_CREATED 1
@@ -84,6 +111,7 @@ struct tc_action_ops {
struct list_head head;
char kind[IFNAMSIZ];
enum tca_id id; /* identifier should match kind */
+ unsigned int net_id;
size_t size;
struct module *owner;
int (*act)(struct sk_buff *, const struct tc_action *,
@@ -92,20 +120,23 @@ struct tc_action_ops {
void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *net, struct tc_action **a, u32 index);
int (*init)(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **act, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **act,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack);
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int,
const struct tc_action_ops *,
struct netlink_ext_ack *);
- void (*stats_update)(struct tc_action *, u64, u32, u64, bool);
+ void (*stats_update)(struct tc_action *, u64, u64, u64, u64, bool);
size_t (*get_fill_size)(const struct tc_action *act);
struct net_device *(*get_dev)(const struct tc_action *a,
tc_action_priv_destructor *destructor);
struct psample_group *
(*get_psample_group)(const struct tc_action *a,
tc_action_priv_destructor *destructor);
+ int (*offload_act_setup)(struct tc_action *act, void *entry_data,
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack);
};
struct tc_action_net {
@@ -159,17 +190,11 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int bind,
u32 flags);
-void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
-
+void tcf_idr_insert_many(struct tc_action *actions[]);
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
-int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
-
-static inline int tcf_idr_release(struct tc_action *a, bool bind)
-{
- return __tcf_idr_release(a, bind, false);
-}
+int tcf_idr_release(struct tc_action *a, bool bind);
int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
int tcf_unregister_action(struct tc_action_ops *a,
@@ -178,16 +203,18 @@ int tcf_action_destroy(struct tc_action *actions[], int bind);
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr, int bind,
- struct tc_action *actions[], size_t *attr_size,
- bool rtnl_held, struct netlink_ext_ack *extack);
+ struct nlattr *est,
+ struct tc_action *actions[], int init_res[], size_t *attr_size,
+ u32 flags, u32 fl_flags, struct netlink_ext_ack *extack);
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
+ bool rtnl_held,
+ struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind,
- bool rtnl_held,
- struct netlink_ext_ack *extack);
+ struct tc_action_ops *a_o, int *init_res,
+ u32 flags, struct netlink_ext_ack *extack);
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
- int ref);
+ int ref, bool terse);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
@@ -195,7 +222,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a,
struct sk_buff *skb)
{
if (likely(a->cpu_bstats)) {
- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
return;
}
spin_lock(&a->tcfa_lock);
@@ -225,25 +252,43 @@ static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
spin_unlock(&a->tcfa_lock);
}
-void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
- bool drop, bool hw);
+void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, bool hw);
int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+int tcf_action_update_hw_stats(struct tc_action *action);
+int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
+ void *cb_priv, bool add);
int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
struct tcf_chain **handle,
struct netlink_ext_ack *newchain);
struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
struct tcf_chain *newchain);
+
+#ifdef CONFIG_INET
+DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
+#endif
+
+int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
+
+#else /* !CONFIG_NET_CLS_ACT */
+
+static inline int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
+ void *cb_priv, bool add) {
+ return 0;
+}
+
#endif /* CONFIG_NET_CLS_ACT */
static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
- u64 packets, u64 lastuse, bool hw)
+ u64 packets, u64 drops,
+ u64 lastuse, bool hw)
{
#ifdef CONFIG_NET_CLS_ACT
if (!a->ops->stats_update)
return;
- a->ops->stats_update(a, bytes, packets, lastuse, hw);
+ a->ops->stats_update(a, bytes, packets, drops, lastuse, hw);
#endif
}
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index a088349dd94f..c04f359655b8 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -64,6 +64,8 @@ struct ifa6_config {
const struct in6_addr *pfx;
unsigned int plen;
+ u8 ifa_proto;
+
const struct in6_addr *peer_pfx;
u32 rt_priority;
@@ -90,12 +92,18 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
#endif
+int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
+ unsigned char nsegs);
+
bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
const unsigned int prefix_len,
struct net_device *dev);
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
+struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev);
+
struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
const struct in6_addr *addr,
struct net_device *dev, int strict);
@@ -103,8 +111,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
struct in6_addr *saddr);
-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
- u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
@@ -229,7 +235,6 @@ void ipv6_mc_unmap(struct inet6_dev *idev);
void ipv6_mc_remap(struct inet6_dev *idev);
void ipv6_mc_init_dev(struct inet6_dev *idev);
void ipv6_mc_destroy_dev(struct inet6_dev *idev);
-int ipv6_mc_check_icmpv6(struct sk_buff *skb);
int ipv6_mc_check_mld(struct sk_buff *skb);
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);
@@ -273,6 +278,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex,
const struct in6_addr *addr);
int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
+void __ipv6_sock_ac_close(struct sock *sk);
void ipv6_sock_ac_close(struct sock *sk);
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
@@ -399,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
{
const struct inet6_dev *idev = __in6_dev_get(dev);
+ if (unlikely(!idev))
+ return true;
+
return !!idev->cnf.ignore_routes_with_linkdown;
}
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 1abae3c340a5..b69ca695935c 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -16,6 +16,12 @@ struct sock;
struct socket;
struct rxrpc_call;
+enum rxrpc_interruptibility {
+ RXRPC_INTERRUPTIBLE, /* Call is interruptible */
+ RXRPC_PREINTERRUPTIBLE, /* Call can be cancelled whilst waiting for a slot */
+ RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */
+};
+
/*
* Debug ID counter for tracing.
*/
@@ -41,31 +47,29 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
gfp_t,
rxrpc_notify_rx_t,
bool,
- bool,
+ enum rxrpc_interruptibility,
unsigned int);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
rxrpc_notify_end_tx_t);
int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
- struct iov_iter *, bool, u32 *, u16 *);
+ struct iov_iter *, size_t *, bool, u32 *, u16 *);
bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, const char *);
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
-u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
- u32 *);
-void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
-bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
- ktime_t *);
bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
unsigned long);
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
+
#endif /* _NET_RXRPC_H */
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 17e10fba2152..480fa579787e 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -16,18 +16,16 @@ void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
struct sock *unix_peer_get(struct sock *sk);
-#define UNIX_HASH_SIZE 256
+#define UNIX_HASH_MOD (256 - 1)
+#define UNIX_HASH_SIZE (256 * 2)
#define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight;
-extern spinlock_t unix_table_lock;
-extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address {
refcount_t refcnt;
int len;
- unsigned int hash;
- struct sockaddr_un name[0];
+ struct sockaddr_un name[];
};
struct unix_skb_parms {
@@ -42,7 +40,7 @@ struct unix_skb_parms {
} __randomize_layout;
struct scm_stat {
- u32 nr_fds;
+ atomic_t nr_fds;
};
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
@@ -70,6 +68,9 @@ struct unix_sock {
struct socket_wq peer_wq;
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ struct sk_buff *oob_skb;
+#endif
};
static inline struct unix_sock *unix_sk(const struct sock *sk)
@@ -82,6 +83,10 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
long unix_inq_len(struct sock *sk);
long unix_outq_len(struct sock *sk);
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags);
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags);
#ifdef CONFIG_SYSCTL
int unix_sysctl_register(struct net *net);
void unix_sysctl_unregister(struct net *net);
@@ -89,4 +94,16 @@ void unix_sysctl_unregister(struct net *net);
static inline int unix_sysctl_register(struct net *net) { return 0; }
static inline void unix_sysctl_unregister(struct net *net) {}
#endif
+
+#ifdef CONFIG_BPF_SYSCALL
+extern struct proto unix_dgram_proto;
+extern struct proto unix_stream_proto;
+
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+void __init unix_bpf_build_proto(void);
+#else
+static inline void __init unix_bpf_build_proto(void)
+{}
+#endif
#endif
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index b1c717286993..568a87c5e0d0 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/workqueue.h>
+#include <net/sock.h>
#include <uapi/linux/vm_sockets.h>
#include "vsock_addr.h"
@@ -77,6 +78,7 @@ struct vsock_sock {
s64 vsock_stream_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_space(struct vsock_sock *vsk);
struct sock *vsock_create_connected(struct sock *parent);
+void vsock_data_ready(struct sock *sk);
/**** TRANSPORT ****/
@@ -134,6 +136,15 @@ struct vsock_transport {
u64 (*stream_rcvhiwat)(struct vsock_sock *);
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);
+ int (*set_rcvlowat)(struct vsock_sock *vsk, int val);
+
+ /* SEQ_PACKET. */
+ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
+ int flags);
+ int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
+ size_t len);
+ bool (*seqpacket_allow)(u32 remote_cid);
+ u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
/* Notification. */
int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
@@ -197,7 +208,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst);
void vsock_remove_sock(struct vsock_sock *vsk);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+ void (*fn)(struct sock *sk));
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
bool vsock_find_cid(unsigned int cid);
diff --git a/include/net/amt.h b/include/net/amt.h
new file mode 100644
index 000000000000..c881bc8b673b
--- /dev/null
+++ b/include/net/amt.h
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021 Taehee Yoo <ap420073@gmail.com>
+ */
+#ifndef _NET_AMT_H_
+#define _NET_AMT_H_
+
+#include <linux/siphash.h>
+#include <linux/jhash.h>
+#include <linux/netdevice.h>
+#include <net/gro_cells.h>
+#include <net/rtnetlink.h>
+
+enum amt_msg_type {
+ AMT_MSG_DISCOVERY = 1,
+ AMT_MSG_ADVERTISEMENT,
+ AMT_MSG_REQUEST,
+ AMT_MSG_MEMBERSHIP_QUERY,
+ AMT_MSG_MEMBERSHIP_UPDATE,
+ AMT_MSG_MULTICAST_DATA,
+ AMT_MSG_TEARDOWN,
+ __AMT_MSG_MAX,
+};
+
+#define AMT_MSG_MAX (__AMT_MSG_MAX - 1)
+
+enum amt_ops {
+ /* A*B */
+ AMT_OPS_INT,
+ /* A+B */
+ AMT_OPS_UNI,
+ /* A-B */
+ AMT_OPS_SUB,
+ /* B-A */
+ AMT_OPS_SUB_REV,
+ __AMT_OPS_MAX,
+};
+
+#define AMT_OPS_MAX (__AMT_OPS_MAX - 1)
+
+enum amt_filter {
+ AMT_FILTER_FWD,
+ AMT_FILTER_D_FWD,
+ AMT_FILTER_FWD_NEW,
+ AMT_FILTER_D_FWD_NEW,
+ AMT_FILTER_ALL,
+ AMT_FILTER_NONE_NEW,
+ AMT_FILTER_BOTH,
+ AMT_FILTER_BOTH_NEW,
+ __AMT_FILTER_MAX,
+};
+
+#define AMT_FILTER_MAX (__AMT_FILTER_MAX - 1)
+
+enum amt_act {
+ AMT_ACT_GMI,
+ AMT_ACT_GMI_ZERO,
+ AMT_ACT_GT,
+ AMT_ACT_STATUS_FWD_NEW,
+ AMT_ACT_STATUS_D_FWD_NEW,
+ AMT_ACT_STATUS_NONE_NEW,
+ __AMT_ACT_MAX,
+};
+
+#define AMT_ACT_MAX (__AMT_ACT_MAX - 1)
+
+enum amt_status {
+ AMT_STATUS_INIT,
+ AMT_STATUS_SENT_DISCOVERY,
+ AMT_STATUS_RECEIVED_DISCOVERY,
+ AMT_STATUS_SENT_ADVERTISEMENT,
+ AMT_STATUS_RECEIVED_ADVERTISEMENT,
+ AMT_STATUS_SENT_REQUEST,
+ AMT_STATUS_RECEIVED_REQUEST,
+ AMT_STATUS_SENT_QUERY,
+ AMT_STATUS_RECEIVED_QUERY,
+ AMT_STATUS_SENT_UPDATE,
+ AMT_STATUS_RECEIVED_UPDATE,
+ __AMT_STATUS_MAX,
+};
+
+#define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1)
+
+/* Gateway events only */
+enum amt_event {
+ AMT_EVENT_NONE,
+ AMT_EVENT_RECEIVE,
+ AMT_EVENT_SEND_DISCOVERY,
+ AMT_EVENT_SEND_REQUEST,
+ __AMT_EVENT_MAX,
+};
+
+struct amt_header {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 type:4,
+ version:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 version:4,
+ type:4;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+} __packed;
+
+struct amt_header_discovery {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u32 type:4,
+ version:4,
+ reserved:24;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u32 version:4,
+ type:4,
+ reserved:24;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be32 nonce;
+} __packed;
+
+struct amt_header_advertisement {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u32 type:4,
+ version:4,
+ reserved:24;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u32 version:4,
+ type:4,
+ reserved:24;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be32 nonce;
+ __be32 ip4;
+} __packed;
+
+struct amt_header_request {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u32 type:4,
+ version:4,
+ reserved1:7,
+ p:1,
+ reserved2:16;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u32 version:4,
+ type:4,
+ p:1,
+ reserved1:7,
+ reserved2:16;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be32 nonce;
+} __packed;
+
+struct amt_header_membership_query {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 type:4,
+ version:4,
+ reserved:6,
+ l:1,
+ g:1,
+ response_mac:48;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u64 version:4,
+ type:4,
+ g:1,
+ l:1,
+ reserved:6,
+ response_mac:48;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be32 nonce;
+} __packed;
+
+struct amt_header_membership_update {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 type:4,
+ version:4,
+ reserved:8,
+ response_mac:48;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u64 version:4,
+ type:4,
+ reserved:8,
+ response_mac:48;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be32 nonce;
+} __packed;
+
+struct amt_header_mcast_data {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u16 type:4,
+ version:4,
+ reserved:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u16 version:4,
+ type:4,
+ reserved:8;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+} __packed;
+
+struct amt_headers {
+ union {
+ struct amt_header_discovery discovery;
+ struct amt_header_advertisement advertisement;
+ struct amt_header_request request;
+ struct amt_header_membership_query query;
+ struct amt_header_membership_update update;
+ struct amt_header_mcast_data data;
+ };
+} __packed;
+
+struct amt_gw_headers {
+ union {
+ struct amt_header_discovery discovery;
+ struct amt_header_request request;
+ struct amt_header_membership_update update;
+ };
+} __packed;
+
+struct amt_relay_headers {
+ union {
+ struct amt_header_advertisement advertisement;
+ struct amt_header_membership_query query;
+ struct amt_header_mcast_data data;
+ };
+} __packed;
+
+struct amt_skb_cb {
+ struct amt_tunnel_list *tunnel;
+};
+
+struct amt_tunnel_list {
+ struct list_head list;
+ /* Protect All resources under an amt_tunne_list */
+ spinlock_t lock;
+ struct amt_dev *amt;
+ u32 nr_groups;
+ u32 nr_sources;
+ enum amt_status status;
+ struct delayed_work gc_wq;
+ __be16 source_port;
+ __be32 ip4;
+ __be32 nonce;
+ siphash_key_t key;
+ u64 mac:48,
+ reserved:16;
+ struct rcu_head rcu;
+ struct hlist_head groups[];
+};
+
+union amt_addr {
+ __be32 ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr ip6;
+#endif
+};
+
+/* RFC 3810
+ *
+ * When the router is in EXCLUDE mode, the router state is represented
+ * by the notation EXCLUDE (X,Y), where X is called the "Requested List"
+ * and Y is called the "Exclude List". All sources, except those from
+ * the Exclude List, will be forwarded by the router
+ */
+enum amt_source_status {
+ AMT_SOURCE_STATUS_NONE,
+ /* Node of Requested List */
+ AMT_SOURCE_STATUS_FWD,
+ /* Node of Exclude List */
+ AMT_SOURCE_STATUS_D_FWD,
+};
+
+/* protected by gnode->lock */
+struct amt_source_node {
+ struct hlist_node node;
+ struct amt_group_node *gnode;
+ struct delayed_work source_timer;
+ union amt_addr source_addr;
+ enum amt_source_status status;
+#define AMT_SOURCE_OLD 0
+#define AMT_SOURCE_NEW 1
+ u8 flags;
+ struct rcu_head rcu;
+};
+
+/* Protected by amt_tunnel_list->lock */
+struct amt_group_node {
+ struct amt_dev *amt;
+ union amt_addr group_addr;
+ union amt_addr host_addr;
+ bool v6;
+ u8 filter_mode;
+ u32 nr_sources;
+ struct amt_tunnel_list *tunnel_list;
+ struct hlist_node node;
+ struct delayed_work group_timer;
+ struct rcu_head rcu;
+ struct hlist_head sources[];
+};
+
+#define AMT_MAX_EVENTS 16
+struct amt_events {
+ enum amt_event event;
+ struct sk_buff *skb;
+};
+
+struct amt_dev {
+ struct net_device *dev;
+ struct net_device *stream_dev;
+ struct net *net;
+ /* Global lock for amt device */
+ spinlock_t lock;
+ /* Used only in relay mode */
+ struct list_head tunnel_list;
+ struct gro_cells gro_cells;
+
+ /* Protected by RTNL */
+ struct delayed_work discovery_wq;
+ /* Protected by RTNL */
+ struct delayed_work req_wq;
+ /* Protected by RTNL */
+ struct delayed_work secret_wq;
+ struct work_struct event_wq;
+ /* AMT status */
+ enum amt_status status;
+ /* Generated key */
+ siphash_key_t key;
+ struct socket __rcu *sock;
+ u32 max_groups;
+ u32 max_sources;
+ u32 hash_buckets;
+ u32 hash_seed;
+ /* Default 128 */
+ u32 max_tunnels;
+ /* Default 128 */
+ u32 nr_tunnels;
+ /* Gateway or Relay mode */
+ u32 mode;
+ /* Default 2268 */
+ __be16 relay_port;
+ /* Default 2268 */
+ __be16 gw_port;
+ /* Outer local ip */
+ __be32 local_ip;
+ /* Outer remote ip */
+ __be32 remote_ip;
+ /* Outer discovery ip */
+ __be32 discovery_ip;
+ /* Only used in gateway mode */
+ __be32 nonce;
+ /* Gateway sent request and received query */
+ bool ready4;
+ bool ready6;
+ u8 req_cnt;
+ u8 qi;
+ u64 qrv;
+ u64 qri;
+ /* Used only in gateway mode */
+ u64 mac:48,
+ reserved:16;
+ /* AMT gateway side message handler queue */
+ struct amt_events events[AMT_MAX_EVENTS];
+ u8 event_idx;
+ u8 nr_events;
+};
+
+#define AMT_TOS 0xc0
+#define AMT_IPHDR_OPTS 4
+#define AMT_IP6HDR_OPTS 8
+#define AMT_GC_INTERVAL (30 * 1000)
+#define AMT_MAX_GROUP 32
+#define AMT_MAX_SOURCE 128
+#define AMT_HSIZE_SHIFT 8
+#define AMT_HSIZE (1 << AMT_HSIZE_SHIFT)
+
+#define AMT_DISCOVERY_TIMEOUT 5000
+#define AMT_INIT_REQ_TIMEOUT 1
+#define AMT_INIT_QUERY_INTERVAL 125
+#define AMT_MAX_REQ_TIMEOUT 120
+#define AMT_MAX_REQ_COUNT 3
+#define AMT_SECRET_TIMEOUT 60000
+#define IANA_AMT_UDP_PORT 2268
+#define AMT_MAX_TUNNELS 128
+#define AMT_MAX_REQS 128
+#define AMT_GW_HLEN (sizeof(struct iphdr) + \
+ sizeof(struct udphdr) + \
+ sizeof(struct amt_gw_headers))
+#define AMT_RELAY_HLEN (sizeof(struct iphdr) + \
+ sizeof(struct udphdr) + \
+ sizeof(struct amt_relay_headers))
+
+static inline bool netif_is_amt(const struct net_device *dev)
+{
+ return dev->rtnl_link_ops && !strcmp(dev->rtnl_link_ops->kind, "amt");
+}
+
+static inline u64 amt_gmi(const struct amt_dev *amt)
+{
+ return ((amt->qrv * amt->qi) + amt->qri) * 1000;
+}
+
+#endif /* _NET_AMT_H_ */
diff --git a/include/net/arp.h b/include/net/arp.h
index 4950191f6b2b..d7ef4ec71dfe 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -53,13 +53,7 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
rcu_read_lock_bh();
n = __ipv4_neigh_lookup_noref(dev, key);
- if (n) {
- unsigned long now = jiffies;
-
- /* avoid dirtying neighbour */
- if (READ_ONCE(n->confirmed) != now)
- WRITE_ONCE(n->confirmed, now);
- }
+ neigh_confirm(n);
rcu_read_unlock_bh();
}
@@ -71,6 +65,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
const unsigned char *src_hw, const unsigned char *th);
int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir);
void arp_ifdown(struct net_device *dev);
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force);
struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
struct net_device *dev, __be32 src_ip,
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 8b7eb46ad72d..f8cf3629a419 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -187,18 +187,12 @@ typedef struct {
typedef struct ax25_route {
struct ax25_route *next;
- refcount_t refcount;
ax25_address callsign;
struct net_device *dev;
ax25_digi *digipeat;
char ip_mode;
} ax25_route;
-static inline void ax25_hold_route(ax25_route *ax25_rt)
-{
- refcount_inc(&ax25_rt->refcount);
-}
-
void __ax25_put_route(ax25_route *ax25_rt);
extern rwlock_t ax25_route_lock;
@@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void)
read_unlock(&ax25_route_lock);
}
-static inline void ax25_put_route(ax25_route *ax25_rt)
-{
- if (refcount_dec_and_test(&ax25_rt->refcount))
- __ax25_put_route(ax25_rt);
-}
-
typedef struct {
char slave; /* slave_mode? */
struct timer_list slave_timer; /* timeout timer */
@@ -229,13 +217,18 @@ struct ctl_table;
typedef struct ax25_dev {
struct ax25_dev *next;
+
struct net_device *dev;
+ netdevice_tracker dev_tracker;
+
struct net_device *forward;
struct ctl_table_header *sysheader;
int values[AX25_MAX_VALUES];
#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
ax25_dama_info dama;
#endif
+ refcount_t refcount;
+ bool device_up;
} ax25_dev;
typedef struct ax25_cb {
@@ -243,6 +236,7 @@ typedef struct ax25_cb {
ax25_address source_addr, dest_addr;
ax25_digi *digipeat;
ax25_dev *ax25_dev;
+ netdevice_tracker dev_tracker;
unsigned char iamdigi;
unsigned char state, modulus, pidincl;
unsigned short vs, vr, va;
@@ -290,6 +284,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25)
}
}
+static inline void ax25_dev_hold(ax25_dev *ax25_dev)
+{
+ refcount_inc(&ax25_dev->refcount);
+}
+
+static inline void ax25_dev_put(ax25_dev *ax25_dev)
+{
+ if (refcount_dec_and_test(&ax25_dev->refcount)) {
+ kfree(ax25_dev);
+ }
+}
static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
{
skb->dev = dev;
@@ -304,7 +309,7 @@ extern spinlock_t ax25_list_lock;
void ax25_cb_add(ax25_cb *);
struct sock *ax25_find_listener(ax25_address *, int, struct net_device *, int);
struct sock *ax25_get_socket(ax25_address *, ax25_address *, int);
-ax25_cb *ax25_find_cb(ax25_address *, ax25_address *, ax25_digi *,
+ax25_cb *ax25_find_cb(const ax25_address *, ax25_address *, ax25_digi *,
struct net_device *);
void ax25_send_to_raw(ax25_address *, struct sk_buff *, int);
void ax25_destroy_socket(ax25_cb *);
@@ -384,10 +389,11 @@ struct ax25_linkfail {
void ax25_linkfail_register(struct ax25_linkfail *lf);
void ax25_linkfail_release(struct ax25_linkfail *lf);
-int __must_check ax25_listen_register(ax25_address *, struct net_device *);
-void ax25_listen_release(ax25_address *, struct net_device *);
+int __must_check ax25_listen_register(const ax25_address *,
+ struct net_device *);
+void ax25_listen_release(const ax25_address *, struct net_device *);
int(*ax25_protocol_function(unsigned int))(struct sk_buff *, ax25_cb *);
-int ax25_listen_mine(ax25_address *, struct net_device *);
+int ax25_listen_mine(const ax25_address *, struct net_device *);
void ax25_link_failed(ax25_cb *, int);
int ax25_protocol_is_registered(unsigned int);
@@ -401,8 +407,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb);
extern const struct header_ops ax25_header_ops;
/* ax25_out.c */
-ax25_cb *ax25_send_frame(struct sk_buff *, int, ax25_address *, ax25_address *,
- ax25_digi *, struct net_device *);
+ax25_cb *ax25_send_frame(struct sk_buff *, int, const ax25_address *,
+ ax25_address *, ax25_digi *, struct net_device *);
void ax25_output(ax25_cb *, int, struct sk_buff *);
void ax25_kick(ax25_cb *);
void ax25_transmit_buffer(ax25_cb *, struct sk_buff *, int);
diff --git a/include/net/ax88796.h b/include/net/ax88796.h
index aa52b2e8ff7b..303100f08ab8 100644
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -8,6 +8,8 @@
#ifndef __NET_AX88796_PLAT_H
#define __NET_AX88796_PLAT_H
+#include <linux/types.h>
+
struct sk_buff;
struct net_device;
struct platform_device;
@@ -32,10 +34,13 @@ struct ax_plat_data {
const unsigned char *buf, int star_page);
void (*block_input)(struct net_device *dev, int count,
struct sk_buff *skb, int ring_offset);
- /* returns nonzero if a pending interrupt request might by caused by
- * the ax88786. Handles all interrupts if set to NULL
+ /* returns nonzero if a pending interrupt request might be caused by
+ * the ax88796. Handles all interrupts if set to NULL
*/
int (*check_irq)(struct platform_device *pdev);
};
+/* exported from ax88796.c for xsurf100.c */
+extern void ax_NS8390_reinit(struct net_device *dev);
+
#endif /* __NET_AX88796_PLAT_H */
diff --git a/include/net/bareudp.h b/include/net/bareudp.h
new file mode 100644
index 000000000000..17610c8d6361
--- /dev/null
+++ b/include/net/bareudp.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NET_BAREUDP_H
+#define __NET_BAREUDP_H
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/rtnetlink.h>
+
+static inline bool netif_is_bareudp(const struct net_device *dev)
+{
+ return dev->rtnl_link_ops &&
+ !strcmp(dev->rtnl_link_ops->kind, "bareudp");
+}
+
+#endif
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index e42bb8e03c09..bcc5a4cd2c17 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -41,6 +41,8 @@
#define BLUETOOTH_VER_1_1 1
#define BLUETOOTH_VER_1_2 2
#define BLUETOOTH_VER_2_0 3
+#define BLUETOOTH_VER_2_1 4
+#define BLUETOOTH_VER_4_0 6
/* Reserv for core and drivers use */
#define BT_SKB_RESERVE 8
@@ -53,6 +55,8 @@
#define BTPROTO_CMTP 5
#define BTPROTO_HIDP 6
#define BTPROTO_AVDTP 7
+#define BTPROTO_ISO 8
+#define BTPROTO_LAST BTPROTO_ISO
#define SOL_HCI 0
#define SOL_L2CAP 6
@@ -121,6 +125,102 @@ struct bt_voice {
#define BT_SNDMTU 12
#define BT_RCVMTU 13
+#define BT_PHY 14
+
+#define BT_PHY_BR_1M_1SLOT 0x00000001
+#define BT_PHY_BR_1M_3SLOT 0x00000002
+#define BT_PHY_BR_1M_5SLOT 0x00000004
+#define BT_PHY_EDR_2M_1SLOT 0x00000008
+#define BT_PHY_EDR_2M_3SLOT 0x00000010
+#define BT_PHY_EDR_2M_5SLOT 0x00000020
+#define BT_PHY_EDR_3M_1SLOT 0x00000040
+#define BT_PHY_EDR_3M_3SLOT 0x00000080
+#define BT_PHY_EDR_3M_5SLOT 0x00000100
+#define BT_PHY_LE_1M_TX 0x00000200
+#define BT_PHY_LE_1M_RX 0x00000400
+#define BT_PHY_LE_2M_TX 0x00000800
+#define BT_PHY_LE_2M_RX 0x00001000
+#define BT_PHY_LE_CODED_TX 0x00002000
+#define BT_PHY_LE_CODED_RX 0x00004000
+
+#define BT_MODE 15
+
+#define BT_MODE_BASIC 0x00
+#define BT_MODE_ERTM 0x01
+#define BT_MODE_STREAMING 0x02
+#define BT_MODE_LE_FLOWCTL 0x03
+#define BT_MODE_EXT_FLOWCTL 0x04
+
+#define BT_PKT_STATUS 16
+
+#define BT_SCM_PKT_STATUS 0x03
+
+#define BT_ISO_QOS 17
+
+#define BT_ISO_QOS_CIG_UNSET 0xff
+#define BT_ISO_QOS_CIS_UNSET 0xff
+
+#define BT_ISO_QOS_BIG_UNSET 0xff
+#define BT_ISO_QOS_BIS_UNSET 0xff
+
+struct bt_iso_io_qos {
+ __u32 interval;
+ __u16 latency;
+ __u16 sdu;
+ __u8 phy;
+ __u8 rtn;
+};
+
+struct bt_iso_qos {
+ union {
+ __u8 cig;
+ __u8 big;
+ };
+ union {
+ __u8 cis;
+ __u8 bis;
+ };
+ union {
+ __u8 sca;
+ __u8 sync_interval;
+ };
+ __u8 packing;
+ __u8 framing;
+ struct bt_iso_io_qos in;
+ struct bt_iso_io_qos out;
+};
+
+#define BT_ISO_PHY_1M 0x01
+#define BT_ISO_PHY_2M 0x02
+#define BT_ISO_PHY_CODED 0x04
+#define BT_ISO_PHY_ANY (BT_ISO_PHY_1M | BT_ISO_PHY_2M | \
+ BT_ISO_PHY_CODED)
+
+#define BT_CODEC 19
+
+struct bt_codec_caps {
+ __u8 len;
+ __u8 data[];
+} __packed;
+
+struct bt_codec {
+ __u8 id;
+ __u16 cid;
+ __u16 vid;
+ __u8 data_path;
+ __u8 num_caps;
+} __packed;
+
+struct bt_codecs {
+ __u8 num_codecs;
+ struct bt_codec codecs[];
+} __packed;
+
+#define BT_CODEC_CVSD 0x02
+#define BT_CODEC_TRANSPARENT 0x03
+#define BT_CODEC_MSBC 0x05
+
+#define BT_ISO_BASE 20
__printf(1, 2)
void bt_info(const char *fmt, ...);
@@ -128,6 +228,12 @@ __printf(1, 2)
void bt_warn(const char *fmt, ...);
__printf(1, 2)
void bt_err(const char *fmt, ...);
+#if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG)
+void bt_dbg_set(bool enable);
+bool bt_dbg_get(void);
+__printf(1, 2)
+void bt_dbg(const char *fmt, ...);
+#endif
__printf(1, 2)
void bt_warn_ratelimited(const char *fmt, ...);
__printf(1, 2)
@@ -136,21 +242,28 @@ void bt_err_ratelimited(const char *fmt, ...);
#define BT_INFO(fmt, ...) bt_info(fmt "\n", ##__VA_ARGS__)
#define BT_WARN(fmt, ...) bt_warn(fmt "\n", ##__VA_ARGS__)
#define BT_ERR(fmt, ...) bt_err(fmt "\n", ##__VA_ARGS__)
+
+#if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG)
+#define BT_DBG(fmt, ...) bt_dbg(fmt "\n", ##__VA_ARGS__)
+#else
#define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__)
+#endif
+
+#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null")
#define bt_dev_info(hdev, fmt, ...) \
- BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_warn(hdev, fmt, ...) \
- BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_err(hdev, fmt, ...) \
- BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_dbg(hdev, fmt, ...) \
- BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_warn_ratelimited(hdev, fmt, ...) \
- bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_err_ratelimited(hdev, fmt, ...) \
- bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
/* Connection and socket states */
enum {
@@ -250,6 +363,7 @@ struct bt_sock {
struct sock *parent;
unsigned long flags;
void (*skb_msg_name)(struct sk_buff *, void *, int *);
+ void (*skb_put_cmsg)(struct sk_buff *, struct msghdr *, struct sock *);
};
enum {
@@ -276,7 +390,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
-int bt_sock_wait_ready(struct sock *sk, unsigned long flags);
+int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags);
void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh);
void bt_accept_unlink(struct sock *sk);
@@ -299,6 +413,10 @@ struct l2cap_ctrl {
struct l2cap_chan *chan;
};
+struct sco_ctrl {
+ u8 pkt_status;
+};
+
struct hci_dev;
typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
@@ -309,6 +427,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status,
#define HCI_REQ_SKB BIT(1)
struct hci_ctrl {
+ struct sock *sk;
u16 opcode;
u8 req_flags;
u8 req_event;
@@ -318,6 +437,11 @@ struct hci_ctrl {
};
};
+struct mgmt_ctrl {
+ struct hci_dev *hdev;
+ u16 opcode;
+};
+
struct bt_skb_cb {
u8 pkt_type;
u8 force_active;
@@ -325,7 +449,9 @@ struct bt_skb_cb {
u8 incoming:1;
union {
struct l2cap_ctrl l2cap;
+ struct sco_ctrl sco;
struct hci_ctrl hci;
+ struct mgmt_ctrl mgmt;
};
};
#define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
@@ -333,6 +459,8 @@ struct bt_skb_cb {
#define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type
#define hci_skb_expect(skb) bt_cb((skb))->expect
#define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode
+#define hci_skb_event(skb) bt_cb((skb))->hci.req_event
+#define hci_skb_sk(skb) bt_cb((skb))->hci.sk
static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how)
{
@@ -372,7 +500,73 @@ out:
return NULL;
}
+/* Shall not be called with lock_sock held */
+static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
+ struct msghdr *msg,
+ size_t len, size_t mtu,
+ size_t headroom, size_t tailroom)
+{
+ struct sk_buff *skb;
+ size_t size = min_t(size_t, len, mtu);
+ int err;
+
+ skb = bt_skb_send_alloc(sk, size + headroom + tailroom,
+ msg->msg_flags & MSG_DONTWAIT, &err);
+ if (!skb)
+ return ERR_PTR(err);
+
+ skb_reserve(skb, headroom);
+ skb_tailroom_reserve(skb, mtu, tailroom);
+
+ if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) {
+ kfree_skb(skb);
+ return ERR_PTR(-EFAULT);
+ }
+
+ skb->priority = sk->sk_priority;
+
+ return skb;
+}
+
+/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments
+ * accourding to the MTU.
+ */
+static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
+ struct msghdr *msg,
+ size_t len, size_t mtu,
+ size_t headroom, size_t tailroom)
+{
+ struct sk_buff *skb, **frag;
+
+ skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
+ if (IS_ERR(skb))
+ return skb;
+
+ len -= skb->len;
+ if (!len)
+ return skb;
+
+ /* Add remaining data over MTU as continuation fragments */
+ frag = &skb_shinfo(skb)->frag_list;
+ while (len) {
+ struct sk_buff *tmp;
+
+ tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
+ if (IS_ERR(tmp)) {
+ return skb;
+ }
+
+ len -= tmp->len;
+
+ *frag = tmp;
+ frag = &(*frag)->next;
+ }
+
+ return skb;
+}
+
int bt_to_errno(u16 code);
+__u8 bt_status(int err);
void hci_sock_set_flag(struct sock *sk, int nr);
void hci_sock_clear_flag(struct sock *sk, int nr);
@@ -410,8 +604,30 @@ static inline void sco_exit(void)
}
#endif
+#if IS_ENABLED(CONFIG_BT_LE)
+int iso_init(void);
+int iso_exit(void);
+bool iso_enabled(void);
+#else
+static inline int iso_init(void)
+{
+ return 0;
+}
+
+static inline int iso_exit(void)
+{
+ return 0;
+}
+
+static inline bool iso_enabled(void)
+{
+ return false;
+}
+#endif
+
int mgmt_init(void);
void mgmt_exit(void);
+void mgmt_cleanup(struct sock *sk);
void bt_sock_reclassify_lock(struct sock *sk, int proto);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 6293bdd7d862..e004ba04a9ae 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -36,7 +36,7 @@
#define HCI_MAX_AMP_ASSOC_SIZE 672
-#define HCI_MAX_CSB_DATA_SIZE 252
+#define HCI_MAX_CPB_DATA_SIZE 252
/* HCI dev events */
#define HCI_DEV_REG 1
@@ -53,6 +53,9 @@
#define HCI_NOTIFY_CONN_ADD 1
#define HCI_NOTIFY_CONN_DEL 2
#define HCI_NOTIFY_VOICE_SETTING 3
+#define HCI_NOTIFY_ENABLE_SCO_CVSD 4
+#define HCI_NOTIFY_ENABLE_SCO_TRANSP 5
+#define HCI_NOTIFY_DISABLE_SCO 6
/* HCI bus types */
#define HCI_VIRTUAL 0
@@ -65,6 +68,7 @@
#define HCI_SPI 7
#define HCI_I2C 8
#define HCI_SMD 9
+#define HCI_VIRTIO 10
/* HCI controller types */
#define HCI_PRIMARY 0x00
@@ -115,7 +119,7 @@ enum {
* wrongly configured local features that will require forcing
* them to enable this mode. Getting RSSI information with the
* inquiry responses is preferred since it allows for a better
- * user expierence.
+ * user experience.
*
* This quirk must be set before hci_register_dev is called.
*/
@@ -142,7 +146,7 @@ enum {
/* When this quirk is set, an external configuration step
* is required and will be indicated with the controller
- * configuation.
+ * configuration.
*
* This quirk can be set before hci_register_dev is called or
* during the hdev->setup vendor callback.
@@ -205,6 +209,60 @@ enum {
*
*/
HCI_QUIRK_NON_PERSISTENT_SETUP,
+
+ /* When this quirk is set, wide band speech is supported by
+ * the driver since no reliable mechanism exist to report
+ * this from the hardware, a driver flag is use to convey
+ * this support
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+
+ /* When this quirk is set, the controller has validated that
+ * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are
+ * valid. This mechanism is necessary as many controllers have
+ * been seen has having trouble initiating a connectable
+ * advertisement despite the state combination being reported as
+ * supported.
+ */
+ HCI_QUIRK_VALID_LE_STATES,
+
+ /*
+ * When this quirk is set, then the hci_suspend_notifier is not
+ * registered. This is intended for devices which drop completely
+ * from the bus on system-suspend and which will show up as a new
+ * HCI after resume.
+ */
+ HCI_QUIRK_NO_SUSPEND_NOTIFIER,
+
+ /*
+ * When this quirk is set, LE tx power is not queried on startup
+ * and the min/max tx power values default to HCI_TX_POWER_INVALID.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER,
+
+ /* When this quirk is set, HCI_OP_SET_EVENT_FLT requests with
+ * HCI_FLT_CLEAR_ALL are ignored and event filtering is
+ * completely avoided. A subset of the CSR controller
+ * clones struggle with this and instantly lock up.
+ *
+ * Note that devices using this must (separately) disable
+ * runtime suspend, because event filtering takes place there.
+ */
+ HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL,
+
+ /*
+ * When this quirk is set, disables the use of
+ * HCI_OP_ENHANCED_SETUP_SYNC_CONN command to setup SCO connections.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN,
};
/* HCI device flags */
@@ -236,6 +294,7 @@ enum {
HCI_MGMT_DEV_CLASS_EVENTS,
HCI_MGMT_LOCAL_NAME_EVENTS,
HCI_MGMT_OOB_DATA_EVENTS,
+ HCI_MGMT_EXP_FEATURE_EVENTS,
};
/*
@@ -257,6 +316,7 @@ enum {
HCI_USER_CHANNEL,
HCI_EXT_CONFIGURED,
HCI_LE_ADV,
+ HCI_LE_PER_ADV,
HCI_LE_SCAN,
HCI_SSP_ENABLED,
HCI_SC_ENABLED,
@@ -277,13 +337,26 @@ enum {
HCI_FAST_CONNECTABLE,
HCI_BREDR_ENABLED,
HCI_LE_SCAN_INTERRUPTED,
+ HCI_WIDEBAND_SPEECH_ENABLED,
+ HCI_EVENT_FILTER_CONFIGURED,
+ HCI_PA_SYNC,
HCI_DUT_MODE,
HCI_VENDOR_DIAG,
HCI_FORCE_BREDR_SMP,
HCI_FORCE_STATIC_ADDR,
HCI_LL_RPA_RESOLUTION,
+ HCI_ENABLE_LL_PRIVACY,
HCI_CMD_PENDING,
+ HCI_FORCE_NO_MITM,
+ HCI_QUALITY_REPORT,
+ HCI_OFFLOAD_CODECS_ENABLED,
+ HCI_LE_SIMULTANEOUS_ROLES,
+ HCI_CMD_DRAIN_WORKQUEUE,
+
+ HCI_MESH_EXPERIMENTAL,
+ HCI_MESH,
+ HCI_MESH_SENDING,
__HCI_NUM_FLAGS,
};
@@ -293,6 +366,7 @@ enum {
#define HCI_PAIRING_TIMEOUT msecs_to_jiffies(60000) /* 60 seconds */
#define HCI_INIT_TIMEOUT msecs_to_jiffies(10000) /* 10 seconds */
#define HCI_CMD_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
+#define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */
#define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */
#define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
#define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */
@@ -418,6 +492,7 @@ enum {
#define LMP_EXT_INQ 0x01
#define LMP_SIMUL_LE_BR 0x02
#define LMP_SIMPLE_PAIR 0x08
+#define LMP_ERR_DATA_REPORTING 0x20
#define LMP_NO_FLUSH 0x40
#define LMP_LSTO 0x01
@@ -425,10 +500,10 @@ enum {
#define LMP_EXTFEATURES 0x80
/* Extended LMP features */
-#define LMP_CSB_MASTER 0x01
-#define LMP_CSB_SLAVE 0x02
-#define LMP_SYNC_TRAIN 0x04
-#define LMP_SYNC_SCAN 0x08
+#define LMP_CPB_CENTRAL 0x01
+#define LMP_CPB_PERIPHERAL 0x02
+#define LMP_SYNC_TRAIN 0x04
+#define LMP_SYNC_SCAN 0x08
#define LMP_SC 0x01
#define LMP_PING 0x02
@@ -442,18 +517,19 @@ enum {
/* LE features */
#define HCI_LE_ENCRYPTION 0x01
#define HCI_LE_CONN_PARAM_REQ_PROC 0x02
-#define HCI_LE_SLAVE_FEATURES 0x08
+#define HCI_LE_PERIPHERAL_FEATURES 0x08
#define HCI_LE_PING 0x10
#define HCI_LE_DATA_LEN_EXT 0x20
-#define HCI_LE_PHY_2M 0x01
-#define HCI_LE_PHY_CODED 0x08
-#define HCI_LE_EXT_ADV 0x10
+#define HCI_LE_LL_PRIVACY 0x40
#define HCI_LE_EXT_SCAN_POLICY 0x80
#define HCI_LE_PHY_2M 0x01
#define HCI_LE_PHY_CODED 0x08
+#define HCI_LE_EXT_ADV 0x10
+#define HCI_LE_PERIODIC_ADV 0x20
#define HCI_LE_CHAN_SEL_ALG2 0x40
-#define HCI_LE_CIS_MASTER 0x10
-#define HCI_LE_CIS_SLAVE 0x20
+#define HCI_LE_CIS_CENTRAL 0x10
+#define HCI_LE_CIS_PERIPHERAL 0x20
+#define HCI_LE_ISO_BROADCASTER 0x40
/* Connection modes */
#define HCI_CM_ACTIVE 0x0000
@@ -510,6 +586,7 @@ enum {
#define HCI_ERROR_CONNECTION_TIMEOUT 0x08
#define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d
#define HCI_ERROR_REJ_BAD_ADDR 0x0f
+#define HCI_ERROR_INVALID_PARAMETERS 0x12
#define HCI_ERROR_REMOTE_USER_TERM 0x13
#define HCI_ERROR_REMOTE_LOW_RESOURCES 0x14
#define HCI_ERROR_REMOTE_POWER_OFF 0x15
@@ -518,6 +595,7 @@ enum {
#define HCI_ERROR_INVALID_LL_PARAMS 0x1e
#define HCI_ERROR_UNSPECIFIED 0x1f
#define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c
+#define HCI_ERROR_CANCELLED_BY_HOST 0x44
/* Flow control modes */
#define HCI_FLOW_CTL_MODE_PACKET_BASED 0x00
@@ -546,6 +624,7 @@ enum {
#define EIR_SSP_RAND_R192 0x0F /* Simple Pairing Randomizer R-192 */
#define EIR_DEVICE_ID 0x10 /* device ID */
#define EIR_APPEARANCE 0x19 /* Device appearance */
+#define EIR_SERVICE_DATA 0x16 /* Service Data */
#define EIR_LE_BDADDR 0x1B /* LE Bluetooth device address */
#define EIR_LE_ROLE 0x1C /* LE role */
#define EIR_SSP_HASH_C256 0x1D /* Simple Pairing Hash C-256 */
@@ -825,23 +904,57 @@ struct hci_cp_logical_link_cancel {
__u8 flow_spec_id;
} __packed;
+#define HCI_OP_ENHANCED_SETUP_SYNC_CONN 0x043d
+struct hci_coding_format {
+ __u8 id;
+ __le16 cid;
+ __le16 vid;
+} __packed;
+
+struct hci_cp_enhanced_setup_sync_conn {
+ __le16 handle;
+ __le32 tx_bandwidth;
+ __le32 rx_bandwidth;
+ struct hci_coding_format tx_coding_format;
+ struct hci_coding_format rx_coding_format;
+ __le16 tx_codec_frame_size;
+ __le16 rx_codec_frame_size;
+ __le32 in_bandwidth;
+ __le32 out_bandwidth;
+ struct hci_coding_format in_coding_format;
+ struct hci_coding_format out_coding_format;
+ __le16 in_coded_data_size;
+ __le16 out_coded_data_size;
+ __u8 in_pcm_data_format;
+ __u8 out_pcm_data_format;
+ __u8 in_pcm_sample_payload_msb_pos;
+ __u8 out_pcm_sample_payload_msb_pos;
+ __u8 in_data_path;
+ __u8 out_data_path;
+ __u8 in_transport_unit_size;
+ __u8 out_transport_unit_size;
+ __le16 max_latency;
+ __le16 pkt_type;
+ __u8 retrans_effort;
+} __packed;
+
struct hci_rp_logical_link_cancel {
__u8 status;
__u8 phy_handle;
__u8 flow_spec_id;
} __packed;
-#define HCI_OP_SET_CSB 0x0441
-struct hci_cp_set_csb {
+#define HCI_OP_SET_CPB 0x0441
+struct hci_cp_set_cpb {
__u8 enable;
__u8 lt_addr;
__u8 lpo_allowed;
__le16 packet_type;
__le16 interval_min;
__le16 interval_max;
- __le16 csb_sv_tout;
+ __le16 cpb_sv_tout;
} __packed;
-struct hci_rp_set_csb {
+struct hci_rp_set_cpb {
__u8 status;
__u8 lt_addr;
__le16 interval;
@@ -932,10 +1045,14 @@ struct hci_cp_sniff_subrate {
#define HCI_OP_RESET 0x0c03
#define HCI_OP_SET_EVENT_FLT 0x0c05
-struct hci_cp_set_event_flt {
- __u8 flt_type;
- __u8 cond_type;
- __u8 condition[0];
+#define HCI_SET_EVENT_FLT_SIZE 9
+struct hci_cp_set_event_filter {
+ __u8 flt_type;
+ __u8 cond_type;
+ struct {
+ bdaddr_t bdaddr;
+ __u8 auto_accept;
+ } __packed addr_conn_flt;
} __packed;
/* Filter types */
@@ -949,8 +1066,9 @@ struct hci_cp_set_event_flt {
#define HCI_CONN_SETUP_ALLOW_BDADDR 0x02
/* CONN_SETUP Conditions */
-#define HCI_CONN_SETUP_AUTO_OFF 0x01
-#define HCI_CONN_SETUP_AUTO_ON 0x02
+#define HCI_CONN_SETUP_AUTO_OFF 0x01
+#define HCI_CONN_SETUP_AUTO_ON 0x02
+#define HCI_CONN_SETUP_AUTO_ON_WITH_RS 0x03
#define HCI_OP_READ_STORED_LINK_KEY 0x0c0d
struct hci_cp_read_stored_link_key {
@@ -959,8 +1077,8 @@ struct hci_cp_read_stored_link_key {
} __packed;
struct hci_rp_read_stored_link_key {
__u8 status;
- __u8 max_keys;
- __u8 num_keys;
+ __le16 max_keys;
+ __le16 num_keys;
} __packed;
#define HCI_OP_DELETE_STORED_LINK_KEY 0x0c12
@@ -970,7 +1088,7 @@ struct hci_cp_delete_stored_link_key {
} __packed;
struct hci_rp_delete_stored_link_key {
__u8 status;
- __u8 num_keys;
+ __le16 num_keys;
} __packed;
#define HCI_MAX_NAME_LENGTH 248
@@ -1086,6 +1204,19 @@ struct hci_rp_read_inq_rsp_tx_power {
__s8 tx_power;
} __packed;
+#define HCI_OP_READ_DEF_ERR_DATA_REPORTING 0x0c5a
+ #define ERR_DATA_REPORTING_DISABLED 0x00
+ #define ERR_DATA_REPORTING_ENABLED 0x01
+struct hci_rp_read_def_err_data_reporting {
+ __u8 status;
+ __u8 err_data_reporting;
+} __packed;
+
+#define HCI_OP_WRITE_DEF_ERR_DATA_REPORTING 0x0c5b
+struct hci_cp_write_def_err_data_reporting {
+ __u8 err_data_reporting;
+} __packed;
+
#define HCI_OP_SET_EVENT_MASK_PAGE_2 0x0c63
#define HCI_OP_READ_LOCATION_DATA 0x0c64
@@ -1120,14 +1251,14 @@ struct hci_rp_delete_reserved_lt_addr {
__u8 lt_addr;
} __packed;
-#define HCI_OP_SET_CSB_DATA 0x0c76
-struct hci_cp_set_csb_data {
+#define HCI_OP_SET_CPB_DATA 0x0c76
+struct hci_cp_set_cpb_data {
__u8 lt_addr;
__u8 fragment;
__u8 data_length;
- __u8 data[HCI_MAX_CSB_DATA_SIZE];
+ __u8 data[HCI_MAX_CPB_DATA_SIZE];
} __packed;
-struct hci_rp_set_csb_data {
+struct hci_rp_set_cpb_data {
__u8 status;
__u8 lt_addr;
} __packed;
@@ -1186,6 +1317,14 @@ struct hci_rp_read_local_oob_ext_data {
__u8 rand256[16];
} __packed;
+#define HCI_CONFIGURE_DATA_PATH 0x0c83
+struct hci_op_configure_data_path {
+ __u8 direction;
+ __u8 data_path_id;
+ __u8 vnd_len;
+ __u8 vnd_data[];
+} __packed;
+
#define HCI_OP_READ_LOCAL_VERSION 0x1001
struct hci_rp_read_local_version {
__u8 status;
@@ -1243,6 +1382,83 @@ struct hci_rp_read_data_block_size {
} __packed;
#define HCI_OP_READ_LOCAL_CODECS 0x100b
+struct hci_std_codecs {
+ __u8 num;
+ __u8 codec[];
+} __packed;
+
+struct hci_vnd_codec {
+ /* company id */
+ __le16 cid;
+ /* vendor codec id */
+ __le16 vid;
+} __packed;
+
+struct hci_vnd_codecs {
+ __u8 num;
+ struct hci_vnd_codec codec[];
+} __packed;
+
+struct hci_rp_read_local_supported_codecs {
+ __u8 status;
+ struct hci_std_codecs std_codecs;
+ struct hci_vnd_codecs vnd_codecs;
+} __packed;
+
+#define HCI_OP_READ_LOCAL_PAIRING_OPTS 0x100c
+struct hci_rp_read_local_pairing_opts {
+ __u8 status;
+ __u8 pairing_opts;
+ __u8 max_key_size;
+} __packed;
+
+#define HCI_OP_READ_LOCAL_CODECS_V2 0x100d
+struct hci_std_codec_v2 {
+ __u8 id;
+ __u8 transport;
+} __packed;
+
+struct hci_std_codecs_v2 {
+ __u8 num;
+ struct hci_std_codec_v2 codec[];
+} __packed;
+
+struct hci_vnd_codec_v2 {
+ __u8 id;
+ __le16 cid;
+ __le16 vid;
+ __u8 transport;
+} __packed;
+
+struct hci_vnd_codecs_v2 {
+ __u8 num;
+ struct hci_vnd_codec_v2 codec[];
+} __packed;
+
+struct hci_rp_read_local_supported_codecs_v2 {
+ __u8 status;
+ struct hci_std_codecs_v2 std_codecs;
+ struct hci_vnd_codecs_v2 vendor_codecs;
+} __packed;
+
+#define HCI_OP_READ_LOCAL_CODEC_CAPS 0x100e
+struct hci_op_read_local_codec_caps {
+ __u8 id;
+ __le16 cid;
+ __le16 vid;
+ __u8 transport;
+ __u8 direction;
+} __packed;
+
+struct hci_codec_caps {
+ __u8 len;
+ __u8 data[];
+} __packed;
+
+struct hci_rp_read_local_codec_caps {
+ __u8 status;
+ __u8 num_caps;
+} __packed;
#define HCI_OP_READ_PAGE_SCAN_ACTIVITY 0x0c1b
struct hci_rp_read_page_scan_activity {
@@ -1335,7 +1551,7 @@ struct hci_rp_read_local_amp_assoc {
__u8 status;
__u8 phy_handle;
__le16 rem_len;
- __u8 frag[0];
+ __u8 frag[];
} __packed;
#define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b
@@ -1343,7 +1559,7 @@ struct hci_cp_write_remote_amp_assoc {
__u8 phy_handle;
__le16 len_so_far;
__le16 rem_len;
- __u8 frag[0];
+ __u8 frag[];
} __packed;
struct hci_rp_write_remote_amp_assoc {
__u8 status;
@@ -1434,7 +1650,7 @@ struct hci_cp_le_set_scan_enable {
} __packed;
#define HCI_LE_USE_PEER_ADDR 0x00
-#define HCI_LE_USE_WHITELIST 0x01
+#define HCI_LE_USE_ACCEPT_LIST 0x01
#define HCI_OP_LE_CREATE_CONN 0x200d
struct hci_cp_le_create_conn {
@@ -1454,22 +1670,22 @@ struct hci_cp_le_create_conn {
#define HCI_OP_LE_CREATE_CONN_CANCEL 0x200e
-#define HCI_OP_LE_READ_WHITE_LIST_SIZE 0x200f
-struct hci_rp_le_read_white_list_size {
+#define HCI_OP_LE_READ_ACCEPT_LIST_SIZE 0x200f
+struct hci_rp_le_read_accept_list_size {
__u8 status;
__u8 size;
} __packed;
-#define HCI_OP_LE_CLEAR_WHITE_LIST 0x2010
+#define HCI_OP_LE_CLEAR_ACCEPT_LIST 0x2010
-#define HCI_OP_LE_ADD_TO_WHITE_LIST 0x2011
-struct hci_cp_le_add_to_white_list {
+#define HCI_OP_LE_ADD_TO_ACCEPT_LIST 0x2011
+struct hci_cp_le_add_to_accept_list {
__u8 bdaddr_type;
bdaddr_t bdaddr;
} __packed;
-#define HCI_OP_LE_DEL_FROM_WHITE_LIST 0x2012
-struct hci_cp_le_del_from_white_list {
+#define HCI_OP_LE_DEL_FROM_ACCEPT_LIST 0x2012
+struct hci_cp_le_del_from_accept_list {
__u8 bdaddr_type;
bdaddr_t bdaddr;
} __packed;
@@ -1588,6 +1804,8 @@ struct hci_rp_le_read_resolv_list_size {
#define HCI_OP_LE_SET_ADDR_RESOLV_ENABLE 0x202d
+#define HCI_OP_LE_SET_RPA_TIMEOUT 0x202e
+
#define HCI_OP_LE_READ_MAX_DATA_LEN 0x202f
struct hci_rp_le_read_max_data_len {
__u8 status;
@@ -1613,7 +1831,7 @@ struct hci_cp_le_set_ext_scan_params {
__u8 own_addr_type;
__u8 filter_policy;
__u8 scanning_phys;
- __u8 data[0];
+ __u8 data[];
} __packed;
#define LE_SCAN_PHY_1M 0x01
@@ -1641,7 +1859,7 @@ struct hci_cp_le_ext_create_conn {
__u8 peer_addr_type;
bdaddr_t peer_addr;
__u8 phys;
- __u8 data[0];
+ __u8 data[];
} __packed;
struct hci_cp_le_ext_conn_param {
@@ -1655,6 +1873,22 @@ struct hci_cp_le_ext_conn_param {
__le16 max_ce_len;
} __packed;
+#define HCI_OP_LE_PA_CREATE_SYNC 0x2044
+struct hci_cp_le_pa_create_sync {
+ __u8 options;
+ __u8 sid;
+ __u8 addr_type;
+ bdaddr_t addr;
+ __le16 skip;
+ __le16 sync_timeout;
+ __u8 sync_cte_type;
+} __packed;
+
+#define HCI_OP_LE_PA_TERM_SYNC 0x2046
+struct hci_cp_le_pa_term_sync {
+ __le16 handle;
+} __packed;
+
#define HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS 0x203b
struct hci_rp_le_read_num_supported_adv_sets {
__u8 status;
@@ -1689,26 +1923,21 @@ struct hci_rp_le_set_ext_adv_params {
__u8 tx_power;
} __packed;
-#define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039
-struct hci_cp_le_set_ext_adv_enable {
- __u8 enable;
- __u8 num_of_sets;
- __u8 data[0];
-} __packed;
-
struct hci_cp_ext_adv_set {
__u8 handle;
__le16 duration;
__u8 max_events;
} __packed;
+#define HCI_MAX_EXT_AD_LENGTH 251
+
#define HCI_OP_LE_SET_EXT_ADV_DATA 0x2037
struct hci_cp_le_set_ext_adv_data {
__u8 handle;
__u8 operation;
__u8 frag_pref;
__u8 length;
- __u8 data[HCI_MAX_AD_LENGTH];
+ __u8 data[];
} __packed;
#define HCI_OP_LE_SET_EXT_SCAN_RSP_DATA 0x2038
@@ -1717,13 +1946,46 @@ struct hci_cp_le_set_ext_scan_rsp_data {
__u8 operation;
__u8 frag_pref;
__u8 length;
- __u8 data[HCI_MAX_AD_LENGTH];
+ __u8 data[];
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039
+struct hci_cp_le_set_ext_adv_enable {
+ __u8 enable;
+ __u8 num_of_sets;
+ __u8 data[];
+} __packed;
+
+#define HCI_OP_LE_SET_PER_ADV_PARAMS 0x203e
+struct hci_cp_le_set_per_adv_params {
+ __u8 handle;
+ __le16 min_interval;
+ __le16 max_interval;
+ __le16 periodic_properties;
+} __packed;
+
+#define HCI_MAX_PER_AD_LENGTH 252
+
+#define HCI_OP_LE_SET_PER_ADV_DATA 0x203f
+struct hci_cp_le_set_per_adv_data {
+ __u8 handle;
+ __u8 operation;
+ __u8 length;
+ __u8 data[];
+} __packed;
+
+#define HCI_OP_LE_SET_PER_ADV_ENABLE 0x2040
+struct hci_cp_le_set_per_adv_enable {
+ __u8 enable;
+ __u8 handle;
} __packed;
#define LE_SET_ADV_DATA_OP_COMPLETE 0x03
#define LE_SET_ADV_DATA_NO_FRAG 0x01
+#define HCI_OP_LE_REMOVE_ADV_SET 0x203c
+
#define HCI_OP_LE_CLEAR_ADV_SETS 0x203d
#define HCI_OP_LE_SET_ADV_SET_RAND_ADDR 0x2035
@@ -1732,6 +1994,23 @@ struct hci_cp_le_set_adv_set_rand_addr {
bdaddr_t bdaddr;
} __packed;
+#define HCI_OP_LE_READ_TRANSMIT_POWER 0x204b
+struct hci_rp_le_read_transmit_power {
+ __u8 status;
+ __s8 min_le_tx_power;
+ __s8 max_le_tx_power;
+} __packed;
+
+#define HCI_NETWORK_PRIVACY 0x00
+#define HCI_DEVICE_PRIVACY 0x01
+
+#define HCI_OP_LE_SET_PRIVACY_MODE 0x204e
+struct hci_cp_le_set_privacy_mode {
+ __u8 bdaddr_type;
+ bdaddr_t bdaddr;
+ __u8 mode;
+} __packed;
+
#define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060
struct hci_rp_le_read_buffer_size_v2 {
__u8 status;
@@ -1757,32 +2036,32 @@ struct hci_rp_le_read_iso_tx_sync {
#define HCI_OP_LE_SET_CIG_PARAMS 0x2062
struct hci_cis_params {
__u8 cis_id;
- __le16 m_sdu;
- __le16 s_sdu;
- __u8 m_phy;
- __u8 s_phy;
- __u8 m_rtn;
- __u8 s_rtn;
+ __le16 c_sdu;
+ __le16 p_sdu;
+ __u8 c_phy;
+ __u8 p_phy;
+ __u8 c_rtn;
+ __u8 p_rtn;
} __packed;
struct hci_cp_le_set_cig_params {
__u8 cig_id;
- __u8 m_interval[3];
- __u8 s_interval[3];
+ __u8 c_interval[3];
+ __u8 p_interval[3];
__u8 sca;
__u8 packing;
__u8 framing;
- __le16 m_latency;
- __le16 s_latency;
+ __le16 c_latency;
+ __le16 p_latency;
__u8 num_cis;
- struct hci_cis_params cis[0];
+ struct hci_cis_params cis[];
} __packed;
struct hci_rp_le_set_cig_params {
__u8 status;
__u8 cig_id;
__u8 num_handles;
- __le16 handle[0];
+ __le16 handle[];
} __packed;
#define HCI_OP_LE_CREATE_CIS 0x2064
@@ -1793,7 +2072,7 @@ struct hci_cis {
struct hci_cp_le_create_cis {
__u8 num_cis;
- struct hci_cis cis[0];
+ struct hci_cis cis[];
} __packed;
#define HCI_OP_LE_REMOVE_CIG 0x2065
@@ -1812,7 +2091,78 @@ struct hci_cp_le_reject_cis {
__u8 reason;
} __packed;
+#define HCI_OP_LE_CREATE_BIG 0x2068
+struct hci_bis {
+ __u8 sdu_interval[3];
+ __le16 sdu;
+ __le16 latency;
+ __u8 rtn;
+ __u8 phy;
+ __u8 packing;
+ __u8 framing;
+ __u8 encryption;
+ __u8 bcode[16];
+} __packed;
+
+struct hci_cp_le_create_big {
+ __u8 handle;
+ __u8 adv_handle;
+ __u8 num_bis;
+ struct hci_bis bis;
+} __packed;
+
+#define HCI_OP_LE_TERM_BIG 0x206a
+struct hci_cp_le_term_big {
+ __u8 handle;
+ __u8 reason;
+} __packed;
+
+#define HCI_OP_LE_BIG_CREATE_SYNC 0x206b
+struct hci_cp_le_big_create_sync {
+ __u8 handle;
+ __le16 sync_handle;
+ __u8 encryption;
+ __u8 bcode[16];
+ __u8 mse;
+ __le16 timeout;
+ __u8 num_bis;
+ __u8 bis[0];
+} __packed;
+
+#define HCI_OP_LE_BIG_TERM_SYNC 0x206c
+struct hci_cp_le_big_term_sync {
+ __u8 handle;
+} __packed;
+
+#define HCI_OP_LE_SETUP_ISO_PATH 0x206e
+struct hci_cp_le_setup_iso_path {
+ __le16 handle;
+ __u8 direction;
+ __u8 path;
+ __u8 codec;
+ __le16 codec_cid;
+ __le16 codec_vid;
+ __u8 delay[3];
+ __u8 codec_cfg_len;
+ __u8 codec_cfg[0];
+} __packed;
+
+struct hci_rp_le_setup_iso_path {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_SET_HOST_FEATURE 0x2074
+struct hci_cp_le_set_host_feature {
+ __u8 bit_number;
+ __u8 bit_value;
+} __packed;
+
/* ---- HCI Events ---- */
+struct hci_ev_status {
+ __u8 status;
+} __packed;
+
#define HCI_EV_INQUIRY_COMPLETE 0x01
#define HCI_EV_INQUIRY_RESULT 0x02
@@ -1825,6 +2175,11 @@ struct inquiry_info {
__le16 clock_offset;
} __packed;
+struct hci_ev_inquiry_result {
+ __u8 num;
+ struct inquiry_info info[];
+};
+
#define HCI_EV_CONN_COMPLETE 0x03
struct hci_ev_conn_complete {
__u8 status;
@@ -1936,8 +2291,8 @@ struct hci_comp_pkts_info {
} __packed;
struct hci_ev_num_comp_pkts {
- __u8 num_hndl;
- struct hci_comp_pkts_info handles[0];
+ __u8 num;
+ struct hci_comp_pkts_info handles[];
} __packed;
#define HCI_EV_MODE_CHANGE 0x14
@@ -1986,7 +2341,7 @@ struct hci_ev_pscan_rep_mode {
} __packed;
#define HCI_EV_INQUIRY_RESULT_WITH_RSSI 0x22
-struct inquiry_info_with_rssi {
+struct inquiry_info_rssi {
bdaddr_t bdaddr;
__u8 pscan_rep_mode;
__u8 pscan_period_mode;
@@ -1994,7 +2349,7 @@ struct inquiry_info_with_rssi {
__le16 clock_offset;
__s8 rssi;
} __packed;
-struct inquiry_info_with_rssi_and_pscan_mode {
+struct inquiry_info_rssi_pscan {
bdaddr_t bdaddr;
__u8 pscan_rep_mode;
__u8 pscan_period_mode;
@@ -2003,6 +2358,10 @@ struct inquiry_info_with_rssi_and_pscan_mode {
__le16 clock_offset;
__s8 rssi;
} __packed;
+struct hci_ev_inquiry_result_rssi {
+ __u8 num;
+ __u8 data[];
+} __packed;
#define HCI_EV_REMOTE_EXT_FEATURES 0x23
struct hci_ev_remote_ext_features {
@@ -2057,6 +2416,11 @@ struct extended_inquiry_info {
__u8 data[240];
} __packed;
+struct hci_ev_ext_inquiry_result {
+ __u8 num;
+ struct extended_inquiry_info info[];
+} __packed;
+
#define HCI_EV_KEY_REFRESH_COMPLETE 0x30
struct hci_ev_key_refresh_complete {
__u8 status;
@@ -2170,7 +2534,7 @@ struct hci_comp_blocks_info {
struct hci_ev_num_comp_blocks {
__le16 num_blocks;
__u8 num_hndl;
- struct hci_comp_blocks_info handles[0];
+ struct hci_comp_blocks_info handles[];
} __packed;
#define HCI_EV_SYNC_TRAIN_COMPLETE 0x4F
@@ -2178,7 +2542,7 @@ struct hci_ev_sync_train_complete {
__u8 status;
} __packed;
-#define HCI_EV_SLAVE_PAGE_RESP_TIMEOUT 0x54
+#define HCI_EV_PERIPHERAL_PAGE_RESP_TIMEOUT 0x54
#define HCI_EV_LE_CONN_COMPLETE 0x01
struct hci_ev_le_conn_complete {
@@ -2217,16 +2581,23 @@ struct hci_ev_le_conn_complete {
#define LE_EXT_ADV_SCAN_RSP 0x0008
#define LE_EXT_ADV_LEGACY_PDU 0x0010
-#define ADDR_LE_DEV_PUBLIC 0x00
-#define ADDR_LE_DEV_RANDOM 0x01
+#define ADDR_LE_DEV_PUBLIC 0x00
+#define ADDR_LE_DEV_RANDOM 0x01
+#define ADDR_LE_DEV_PUBLIC_RESOLVED 0x02
+#define ADDR_LE_DEV_RANDOM_RESOLVED 0x03
#define HCI_EV_LE_ADVERTISING_REPORT 0x02
struct hci_ev_le_advertising_info {
- __u8 evt_type;
+ __u8 type;
__u8 bdaddr_type;
bdaddr_t bdaddr;
__u8 length;
- __u8 data[0];
+ __u8 data[];
+} __packed;
+
+struct hci_ev_le_advertising_report {
+ __u8 num;
+ struct hci_ev_le_advertising_info info[];
} __packed;
#define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03
@@ -2272,7 +2643,7 @@ struct hci_ev_le_data_len_change {
#define HCI_EV_LE_DIRECT_ADV_REPORT 0x0B
struct hci_ev_le_direct_adv_info {
- __u8 evt_type;
+ __u8 type;
__u8 bdaddr_type;
bdaddr_t bdaddr;
__u8 direct_addr_type;
@@ -2280,6 +2651,11 @@ struct hci_ev_le_direct_adv_info {
__s8 rssi;
} __packed;
+struct hci_ev_le_direct_adv_report {
+ __u8 num;
+ struct hci_ev_le_direct_adv_info info[];
+} __packed;
+
#define HCI_EV_LE_PHY_UPDATE_COMPLETE 0x0c
struct hci_ev_le_phy_update_complete {
__u8 status;
@@ -2289,8 +2665,8 @@ struct hci_ev_le_phy_update_complete {
} __packed;
#define HCI_EV_LE_EXT_ADV_REPORT 0x0d
-struct hci_ev_le_ext_adv_report {
- __le16 evt_type;
+struct hci_ev_le_ext_adv_info {
+ __le16 type;
__u8 bdaddr_type;
bdaddr_t bdaddr;
__u8 primary_phy;
@@ -2298,11 +2674,28 @@ struct hci_ev_le_ext_adv_report {
__u8 sid;
__u8 tx_power;
__s8 rssi;
- __le16 interval;
- __u8 direct_addr_type;
+ __le16 interval;
+ __u8 direct_addr_type;
bdaddr_t direct_addr;
- __u8 length;
- __u8 data[0];
+ __u8 length;
+ __u8 data[];
+} __packed;
+
+struct hci_ev_le_ext_adv_report {
+ __u8 num;
+ struct hci_ev_le_ext_adv_info info[];
+} __packed;
+
+#define HCI_EV_LE_PA_SYNC_ESTABLISHED 0x0e
+struct hci_ev_le_pa_sync_established {
+ __u8 status;
+ __le16 handle;
+ __u8 sid;
+ __u8 bdaddr_type;
+ bdaddr_t bdaddr;
+ __u8 phy;
+ __le16 interval;
+ __u8 clock_accuracy;
} __packed;
#define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a
@@ -2334,17 +2727,17 @@ struct hci_evt_le_cis_established {
__le16 handle;
__u8 cig_sync_delay[3];
__u8 cis_sync_delay[3];
- __u8 m_latency[3];
- __u8 s_latency[3];
- __u8 m_phy;
- __u8 s_phy;
+ __u8 c_latency[3];
+ __u8 p_latency[3];
+ __u8 c_phy;
+ __u8 p_phy;
__u8 nse;
- __u8 m_bn;
- __u8 s_bn;
- __u8 m_ft;
- __u8 s_ft;
- __le16 m_mtu;
- __le16 s_mtu;
+ __u8 c_bn;
+ __u8 p_bn;
+ __u8 c_ft;
+ __u8 p_ft;
+ __le16 c_mtu;
+ __le16 p_mtu;
__le16 interval;
} __packed;
@@ -2356,13 +2749,62 @@ struct hci_evt_le_cis_req {
__u8 cis_id;
} __packed;
+#define HCI_EVT_LE_CREATE_BIG_COMPLETE 0x1b
+struct hci_evt_le_create_big_complete {
+ __u8 status;
+ __u8 handle;
+ __u8 sync_delay[3];
+ __u8 transport_delay[3];
+ __u8 phy;
+ __u8 nse;
+ __u8 bn;
+ __u8 pto;
+ __u8 irc;
+ __le16 max_pdu;
+ __le16 interval;
+ __u8 num_bis;
+ __le16 bis_handle[];
+} __packed;
+
+#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d
+struct hci_evt_le_big_sync_estabilished {
+ __u8 status;
+ __u8 handle;
+ __u8 latency[3];
+ __u8 nse;
+ __u8 bn;
+ __u8 pto;
+ __u8 irc;
+ __le16 max_pdu;
+ __le16 interval;
+ __u8 num_bis;
+ __le16 bis[];
+} __packed;
+
+#define HCI_EVT_LE_BIG_INFO_ADV_REPORT 0x22
+struct hci_evt_le_big_info_adv_report {
+ __le16 sync_handle;
+ __u8 num_bis;
+ __u8 nse;
+ __le16 iso_interval;
+ __u8 bn;
+ __u8 pto;
+ __u8 irc;
+ __le16 max_pdu;
+ __u8 sdu_interval[3];
+ __le16 max_sdu;
+ __u8 phy;
+ __u8 framing;
+ __u8 encryption;
+} __packed;
+
#define HCI_EV_VENDOR 0xff
/* Internal events generated by Bluetooth stack */
#define HCI_EV_STACK_INTERNAL 0xfd
struct hci_ev_stack_internal {
__u16 type;
- __u8 data[0];
+ __u8 data[];
} __packed;
#define HCI_EV_SI_DEVICE 0x01
@@ -2409,7 +2851,7 @@ struct hci_sco_hdr {
struct hci_iso_hdr {
__le16 handle;
__le16 dlen;
- __u8 data[0];
+ __u8 data[];
} __packed;
/* ISO data packet status flags */
@@ -2465,4 +2907,15 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb)
#define hci_iso_data_len(h) ((h) & 0x3fff)
#define hci_iso_data_flags(h) ((h) >> 14)
+/* codec transport types */
+#define HCI_TRANSPORT_SCO_ESCO 0x01
+
+/* le24 support */
+static inline void hci_cpu_to_le24(__u32 val, __u8 dst[3])
+{
+ dst[0] = val & 0xff;
+ dst[1] = (val & 0xff00) >> 8;
+ dst[2] = (val & 0xff0000) >> 16;
+}
+
#endif /* __HCI_H */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 89ecf0a80aa1..c54bc71254af 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -25,15 +25,20 @@
#ifndef __HCI_CORE_H
#define __HCI_CORE_H
+#include <linux/idr.h>
#include <linux/leds.h>
#include <linux/rculist.h>
#include <net/bluetooth/hci.h>
+#include <net/bluetooth/hci_sync.h>
#include <net/bluetooth/hci_sock.h>
/* HCI priority */
#define HCI_PRIO_MAX 7
+/* HCI maximum id value */
+#define HCI_MAX_ID 10000
+
/* HCI Core structures */
struct inquiry_data {
bdaddr_t bdaddr;
@@ -86,6 +91,34 @@ struct discovery_state {
u8 (*uuids)[16];
unsigned long scan_start;
unsigned long scan_duration;
+ unsigned long name_resolve_timeout;
+};
+
+#define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
+
+enum suspend_tasks {
+ SUSPEND_PAUSE_DISCOVERY,
+ SUSPEND_UNPAUSE_DISCOVERY,
+
+ SUSPEND_PAUSE_ADVERTISING,
+ SUSPEND_UNPAUSE_ADVERTISING,
+
+ SUSPEND_SCAN_DISABLE,
+ SUSPEND_SCAN_ENABLE,
+ SUSPEND_DISCONNECTING,
+
+ SUSPEND_POWERING_DOWN,
+
+ SUSPEND_PREPARE_NOTIFIER,
+
+ SUSPEND_SET_ADV_FILTER,
+ __SUSPEND_NUM_TASKS
+};
+
+enum suspended_state {
+ BT_RUNNING = 0,
+ BT_SUSPEND_DISCONNECT,
+ BT_SUSPEND_CONFIGURE_WAKE,
};
struct hci_conn_hash {
@@ -93,8 +126,9 @@ struct hci_conn_hash {
unsigned int acl_num;
unsigned int amp_num;
unsigned int sco_num;
+ unsigned int iso_num;
unsigned int le_num;
- unsigned int le_num_slave;
+ unsigned int le_num_peripheral;
};
struct bdaddr_list {
@@ -103,6 +137,17 @@ struct bdaddr_list {
u8 bdaddr_type;
};
+struct codec_list {
+ struct list_head list;
+ u8 id;
+ __u16 cid;
+ __u16 vid;
+ u8 transport;
+ u8 num_caps;
+ u32 len;
+ struct hci_codec_caps caps[];
+};
+
struct bdaddr_list_with_irk {
struct list_head list;
bdaddr_t bdaddr;
@@ -111,6 +156,20 @@ struct bdaddr_list_with_irk {
u8 local_irk[16];
};
+/* Bitmask of connection flags */
+enum hci_conn_flags {
+ HCI_CONN_FLAG_REMOTE_WAKEUP = 1,
+ HCI_CONN_FLAG_DEVICE_PRIVACY = 2,
+};
+typedef u8 hci_conn_flags_t;
+
+struct bdaddr_list_with_flags {
+ struct list_head list;
+ bdaddr_t bdaddr;
+ u8 bdaddr_type;
+ hci_conn_flags_t flags;
+};
+
struct bt_uuid {
struct list_head list;
u8 uuid[16];
@@ -176,17 +235,26 @@ struct oob_data {
struct adv_info {
struct list_head list;
- bool pending;
+ bool enabled;
+ bool pending;
+ bool periodic;
+ __u8 mesh;
__u8 instance;
__u32 flags;
__u16 timeout;
__u16 remaining_time;
__u16 duration;
__u16 adv_data_len;
- __u8 adv_data[HCI_MAX_AD_LENGTH];
+ __u8 adv_data[HCI_MAX_EXT_AD_LENGTH];
+ bool adv_data_changed;
__u16 scan_rsp_len;
- __u8 scan_rsp_data[HCI_MAX_AD_LENGTH];
+ __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH];
+ bool scan_rsp_changed;
+ __u16 per_adv_data_len;
+ __u8 per_adv_data[HCI_MAX_PER_AD_LENGTH];
__s8 tx_power;
+ __u32 min_interval;
+ __u32 max_interval;
bdaddr_t random_addr;
bool rpa_expired;
struct delayed_work rpa_expired_cb;
@@ -195,8 +263,65 @@ struct adv_info {
#define HCI_MAX_ADV_INSTANCES 5
#define HCI_DEFAULT_ADV_DURATION 2
+#define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F
+
+#define DATA_CMP(_d1, _l1, _d2, _l2) \
+ (_l1 == _l2 ? memcmp(_d1, _d2, _l1) : _l1 - _l2)
+
+#define ADV_DATA_CMP(_adv, _data, _len) \
+ DATA_CMP((_adv)->adv_data, (_adv)->adv_data_len, _data, _len)
+
+#define SCAN_RSP_CMP(_adv, _data, _len) \
+ DATA_CMP((_adv)->scan_rsp_data, (_adv)->scan_rsp_len, _data, _len)
+
+struct monitored_device {
+ struct list_head list;
+
+ bdaddr_t bdaddr;
+ __u8 addr_type;
+ __u16 handle;
+ bool notified;
+};
+
+struct adv_pattern {
+ struct list_head list;
+ __u8 ad_type;
+ __u8 offset;
+ __u8 length;
+ __u8 value[HCI_MAX_AD_LENGTH];
+};
+
+struct adv_rssi_thresholds {
+ __s8 low_threshold;
+ __s8 high_threshold;
+ __u16 low_threshold_timeout;
+ __u16 high_threshold_timeout;
+ __u8 sampling_period;
+};
+
+struct adv_monitor {
+ struct list_head patterns;
+ struct adv_rssi_thresholds rssi;
+ __u16 handle;
+
+ enum {
+ ADV_MONITOR_STATE_NOT_REGISTERED,
+ ADV_MONITOR_STATE_REGISTERED,
+ ADV_MONITOR_STATE_OFFLOADED
+ } state;
+};
+
+#define HCI_MIN_ADV_MONITOR_HANDLE 1
+#define HCI_MAX_ADV_MONITOR_NUM_HANDLES 32
+#define HCI_MAX_ADV_MONITOR_NUM_PATTERNS 16
+#define HCI_ADV_MONITOR_EXT_NONE 1
+#define HCI_ADV_MONITOR_EXT_MSFT 2
+
#define HCI_MAX_SHORT_NAME_LENGTH 10
+#define HCI_CONN_HANDLE_UNSET 0xffff
+#define HCI_CONN_HANDLE_MAX 0x0eff
+
/* Min encryption key size to match with SMP */
#define HCI_MIN_ENC_KEY_SIZE 7
@@ -244,10 +369,12 @@ struct hci_dev {
__u8 max_page;
__u8 features[HCI_MAX_PAGES][8];
__u8 le_features[8];
- __u8 le_white_list_size;
+ __u8 le_accept_list_size;
__u8 le_resolv_list_size;
__u8 le_num_of_adv_sets;
__u8 le_states[8];
+ __u8 mesh_ad_types[16];
+ __u8 mesh_send_ref;
__u8 commands[64];
__u8 hci_ver;
__u16 hci_rev;
@@ -256,10 +383,11 @@ struct hci_dev {
__u16 lmp_subver;
__u16 voice_setting;
__u8 num_iac;
- __u8 stored_max_keys;
- __u8 stored_num_keys;
+ __u16 stored_max_keys;
+ __u16 stored_num_keys;
__u8 io_capability;
__s8 inq_tx_power;
+ __u8 err_data_reporting;
__u16 page_scan_interval;
__u16 page_scan_window;
__u8 page_scan_type;
@@ -269,6 +397,14 @@ struct hci_dev {
__u8 le_scan_type;
__u16 le_scan_interval;
__u16 le_scan_window;
+ __u16 le_scan_int_suspend;
+ __u16 le_scan_window_suspend;
+ __u16 le_scan_int_discovery;
+ __u16 le_scan_window_discovery;
+ __u16 le_scan_int_adv_monitor;
+ __u16 le_scan_window_adv_monitor;
+ __u16 le_scan_int_connect;
+ __u16 le_scan_window_connect;
__u16 le_conn_min_interval;
__u16 le_conn_max_interval;
__u16 le_conn_latency;
@@ -286,15 +422,33 @@ struct hci_dev {
__u16 conn_info_max_age;
__u16 auth_payload_timeout;
__u8 min_enc_key_size;
+ __u8 max_enc_key_size;
+ __u8 pairing_opts;
__u8 ssp_debug_mode;
__u8 hw_error_code;
__u32 clock;
+ __u16 advmon_allowlist_duration;
+ __u16 advmon_no_filter_duration;
+ __u8 enable_advmon_interleave_scan;
__u16 devid_source;
__u16 devid_vendor;
__u16 devid_product;
__u16 devid_version;
+ __u8 def_page_scan_type;
+ __u16 def_page_scan_int;
+ __u16 def_page_scan_window;
+ __u8 def_inq_scan_type;
+ __u16 def_inq_scan_int;
+ __u16 def_inq_scan_window;
+ __u16 def_br_lsto;
+ __u16 def_page_timeout;
+ __u16 def_multi_adv_rotation_duration;
+ __u16 def_le_autoconnect_timeout;
+ __s8 min_le_tx_power;
+ __s8 max_le_tx_power;
+
__u16 pkt_type;
__u16 esco_type;
__u16 link_policy;
@@ -327,13 +481,16 @@ struct hci_dev {
unsigned int acl_cnt;
unsigned int sco_cnt;
unsigned int le_cnt;
+ unsigned int iso_cnt;
unsigned int acl_mtu;
unsigned int sco_mtu;
unsigned int le_mtu;
+ unsigned int iso_mtu;
unsigned int acl_pkts;
unsigned int sco_pkts;
unsigned int le_pkts;
+ unsigned int iso_pkts;
__u16 block_len;
__u16 block_mtu;
@@ -353,6 +510,11 @@ struct hci_dev {
struct work_struct power_on;
struct delayed_work power_off;
struct work_struct error_reset;
+ struct work_struct cmd_sync_work;
+ struct list_head cmd_sync_work_list;
+ struct mutex cmd_sync_work_lock;
+ struct work_struct cmd_sync_cancel_work;
+ struct work_struct reenable_adv_work;
__u16 discov_timeout;
struct delayed_work discov_off;
@@ -360,16 +522,12 @@ struct hci_dev {
struct delayed_work service_cache;
struct delayed_work cmd_timer;
+ struct delayed_work ncmd_timer;
struct work_struct rx_work;
struct work_struct cmd_work;
struct work_struct tx_work;
- struct work_struct discov_update;
- struct work_struct bg_scan_update;
- struct work_struct scan_update;
- struct work_struct connectable_update;
- struct work_struct discoverable_update;
struct delayed_work le_scan_disable;
struct delayed_work le_scan_restart;
@@ -378,6 +536,7 @@ struct hci_dev {
struct sk_buff_head cmd_q;
struct sk_buff *sent_cmd;
+ struct sk_buff *recv_event;
struct mutex req_lock;
wait_queue_head_t req_wait_q;
@@ -389,22 +548,39 @@ struct hci_dev {
void *smp_bredr_data;
struct discovery_state discovery;
+
+ int discovery_old_state;
+ bool discovery_paused;
+ int advertising_old_state;
+ bool advertising_paused;
+
+ struct notifier_block suspend_notifier;
+ enum suspended_state suspend_state_next;
+ enum suspended_state suspend_state;
+ bool scanning_paused;
+ bool suspended;
+ u8 wake_reason;
+ bdaddr_t wake_addr;
+ u8 wake_addr_type;
+
struct hci_conn_hash conn_hash;
+ struct list_head mesh_pending;
struct list_head mgmt_pending;
- struct list_head blacklist;
- struct list_head whitelist;
+ struct list_head reject_list;
+ struct list_head accept_list;
struct list_head uuids;
struct list_head link_keys;
struct list_head long_term_keys;
struct list_head identity_resolving_keys;
struct list_head remote_oob_data;
- struct list_head le_white_list;
+ struct list_head le_accept_list;
struct list_head le_resolv_list;
struct list_head le_conn_params;
struct list_head pend_le_conns;
struct list_head pend_le_reports;
struct list_head blocked_keys;
+ struct list_head local_codecs;
struct hci_dev_stats stat;
@@ -419,12 +595,15 @@ struct hci_dev {
struct rfkill *rfkill;
DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS);
+ hci_conn_flags_t conn_flags;
__s8 adv_tx_power;
- __u8 adv_data[HCI_MAX_AD_LENGTH];
+ __u8 adv_data[HCI_MAX_EXT_AD_LENGTH];
__u8 adv_data_len;
- __u8 scan_rsp_data[HCI_MAX_AD_LENGTH];
+ __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH];
__u8 scan_rsp_data_len;
+ __u8 per_adv_data[HCI_MAX_PER_AD_LENGTH];
+ __u8 per_adv_data_len;
struct list_head adv_instances;
unsigned int adv_instance_cnt;
@@ -432,15 +611,42 @@ struct hci_dev {
__u16 adv_instance_timeout;
struct delayed_work adv_instance_expire;
+ struct idr adv_monitors_idr;
+ unsigned int adv_monitors_cnt;
+
__u8 irk[16];
__u32 rpa_timeout;
struct delayed_work rpa_expired;
bdaddr_t rpa;
+ struct delayed_work mesh_send_done;
+
+ enum {
+ INTERLEAVE_SCAN_NONE,
+ INTERLEAVE_SCAN_NO_FILTER,
+ INTERLEAVE_SCAN_ALLOWLIST
+ } interleave_scan_state;
+
+ struct delayed_work interleave_scan;
+
+ struct list_head monitored_devices;
+ bool advmon_pend_notify;
+
#if IS_ENABLED(CONFIG_BT_LEDS)
struct led_trigger *power_led;
#endif
+#if IS_ENABLED(CONFIG_BT_MSFTEXT)
+ __u16 msft_opcode;
+ void *msft_data;
+ bool msft_curve_validity;
+#endif
+
+#if IS_ENABLED(CONFIG_BT_AOSPEXT)
+ bool aosp_capable;
+ bool aosp_quality_report;
+#endif
+
int (*open)(struct hci_dev *hdev);
int (*close)(struct hci_dev *hdev);
int (*flush)(struct hci_dev *hdev);
@@ -453,10 +659,23 @@ struct hci_dev {
int (*set_diag)(struct hci_dev *hdev, bool enable);
int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
void (*cmd_timeout)(struct hci_dev *hdev);
+ bool (*wakeup)(struct hci_dev *hdev);
+ int (*set_quality_report)(struct hci_dev *hdev, bool enable);
+ int (*get_data_path_id)(struct hci_dev *hdev, __u8 *data_path);
+ int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type,
+ struct bt_codec *codec, __u8 *vnd_len,
+ __u8 **vnd_data);
};
#define HCI_PHY_HANDLE(handle) (handle & 0xff)
+enum conn_reasons {
+ CONN_REASON_PAIR_DEVICE,
+ CONN_REASON_L2CAP_CHAN,
+ CONN_REASON_SCO_CONNECT,
+ CONN_REASON_ISO_CONNECT,
+};
+
struct hci_conn {
struct list_head list;
@@ -470,7 +689,9 @@ struct hci_conn {
__u8 init_addr_type;
bdaddr_t resp_addr;
__u8 resp_addr_type;
+ __u8 adv_instance;
__u16 handle;
+ __u16 sync_handle;
__u16 state;
__u8 mode;
__u8 type;
@@ -501,13 +722,18 @@ struct hci_conn {
__u16 le_supv_timeout;
__u8 le_adv_data[HCI_MAX_AD_LENGTH];
__u8 le_adv_data_len;
+ __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH];
+ __u8 le_per_adv_data_len;
__u8 le_tx_phy;
__u8 le_rx_phy;
__s8 rssi;
__s8 tx_power;
__s8 max_tx_power;
+ struct bt_iso_qos iso_qos;
unsigned long flags;
+ enum conn_reasons conn_reason;
+
__u32 clock;
__u16 clock_accuracy;
@@ -534,13 +760,17 @@ struct hci_conn {
struct hci_dev *hdev;
void *l2cap_data;
void *sco_data;
+ void *iso_data;
struct amp_mgr *amp_mgr;
struct hci_conn *link;
+ struct bt_codec codec;
void (*connect_cfm_cb) (struct hci_conn *conn, u8 status);
void (*security_cfm_cb) (struct hci_conn *conn, u8 status);
void (*disconn_cfm_cb) (struct hci_conn *conn, u8 reason);
+
+ void (*cleanup)(struct hci_conn *conn);
};
struct hci_chan {
@@ -550,6 +780,7 @@ struct hci_chan {
struct sk_buff_head data_q;
unsigned int sent;
__u8 state;
+ bool amp;
};
struct hci_conn_params {
@@ -575,6 +806,8 @@ struct hci_conn_params {
struct hci_conn *conn;
bool explicit_connect;
+ hci_conn_flags_t flags;
+ u8 privacy_mode;
};
extern struct list_head hci_dev_list;
@@ -594,9 +827,17 @@ extern struct mutex hci_cb_list_lock;
do { \
hci_dev_clear_flag(hdev, HCI_LE_SCAN); \
hci_dev_clear_flag(hdev, HCI_LE_ADV); \
+ hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\
hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \
+ hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \
} while (0)
+#define hci_dev_le_state_simultaneous(hdev) \
+ (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \
+ (hdev->le_states[4] & 0x08) && /* Central */ \
+ (hdev->le_states[4] & 0x40) && /* Peripheral */ \
+ (hdev->le_states[3] & 0x10)) /* Simultaneous */
+
/* ----- HCI interface to upper protocols ----- */
int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
int l2cap_disconn_ind(struct hci_conn *hcon);
@@ -617,6 +858,21 @@ static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
}
#endif
+#if IS_ENABLED(CONFIG_BT_LE)
+int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
+void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);
+#else
+static inline int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ __u8 *flags)
+{
+ return 0;
+}
+static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb,
+ u16 flags)
+{
+}
+#endif
+
/* ----- Inquiry cache ----- */
#define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */
#define INQUIRY_ENTRY_AGE_MAX (HZ*60) /* 60 seconds */
@@ -701,6 +957,7 @@ enum {
HCI_CONN_NEW_LINK_KEY,
HCI_CONN_SCANNING,
HCI_CONN_AUTH_FAILURE,
+ HCI_CONN_PER_ADV,
};
static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
@@ -731,12 +988,15 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c)
case LE_LINK:
h->le_num++;
if (c->role == HCI_ROLE_SLAVE)
- h->le_num_slave++;
+ h->le_num_peripheral++;
break;
case SCO_LINK:
case ESCO_LINK:
h->sco_num++;
break;
+ case ISO_LINK:
+ h->iso_num++;
+ break;
}
}
@@ -757,12 +1017,15 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c)
case LE_LINK:
h->le_num--;
if (c->role == HCI_ROLE_SLAVE)
- h->le_num_slave--;
+ h->le_num_peripheral--;
break;
case SCO_LINK:
case ESCO_LINK:
h->sco_num--;
break;
+ case ISO_LINK:
+ h->iso_num--;
+ break;
}
}
@@ -779,6 +1042,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type)
case SCO_LINK:
case ESCO_LINK:
return h->sco_num;
+ case ISO_LINK:
+ return h->iso_num;
default:
return 0;
}
@@ -788,7 +1053,7 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev)
{
struct hci_conn_hash *c = &hdev->conn_hash;
- return c->acl_num + c->amp_num + c->sco_num + c->le_num;
+ return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num;
}
static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle)
@@ -811,6 +1076,29 @@ static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle)
return type;
}
+static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
+ bdaddr_t *ba,
+ __u8 big, __u8 bis)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (bacmp(&c->dst, ba) || c->type != ISO_LINK)
+ continue;
+
+ if (c->iso_qos.big == big && c->iso_qos.bis == bis) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
static inline struct hci_conn *hci_conn_hash_lookup_handle(struct hci_dev *hdev,
__u16 handle)
{
@@ -874,6 +1162,76 @@ static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev,
return NULL;
}
+static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
+ bdaddr_t *ba,
+ __u8 ba_type)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type != ISO_LINK)
+ continue;
+
+ if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev,
+ __u8 handle)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type != ISO_LINK)
+ continue;
+
+ if (handle == c->iso_qos.cig) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
+ __u8 handle)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK)
+ continue;
+
+ if (handle == c->iso_qos.big) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
__u8 type, __u16 state)
{
@@ -894,6 +1252,27 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
return NULL;
}
+typedef void (*hci_conn_func_t)(struct hci_conn *conn, void *data);
+static inline void hci_conn_hash_list_state(struct hci_dev *hdev,
+ hci_conn_func_t func, __u8 type,
+ __u16 state, void *data)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ if (!func)
+ return;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type == type && c->state == state)
+ func(c, data);
+ }
+
+ rcu_read_unlock();
+}
+
static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
{
struct hci_conn_hash *h = &hdev->conn_hash;
@@ -917,6 +1296,8 @@ static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
int hci_disconnect(struct hci_conn *conn, __u8 reason);
bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
void hci_sco_setup(struct hci_conn *conn, __u8 status);
+bool hci_iso_setup_path(struct hci_conn *conn);
+int hci_le_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role);
@@ -931,14 +1312,27 @@ struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle);
struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, u8 sec_level,
- u16 conn_timeout);
+ u16 conn_timeout,
+ enum conn_reasons conn_reason);
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
- u8 dst_type, u8 sec_level, u16 conn_timeout,
- u8 role, bdaddr_t *direct_rpa);
+ u8 dst_type, bool dst_resolved, u8 sec_level,
+ u16 conn_timeout, u8 role);
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
- u8 sec_level, u8 auth_type);
+ u8 sec_level, u8 auth_type,
+ enum conn_reasons conn_reason);
struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
- __u16 setting);
+ __u16 setting, struct bt_codec *codec);
+struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos);
+struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos);
+struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 data_len, __u8 *data);
+int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
+ __u8 sid);
+int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
+ __u16 sync_handle, __u8 num_bis, __u8 bis[]);
int hci_conn_check_link_mode(struct hci_conn *conn);
int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
@@ -947,7 +1341,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
-void hci_le_conn_failed(struct hci_conn *conn, u8 status);
+void hci_conn_failed(struct hci_conn *conn, u8 status);
/*
* hci_conn_get() and hci_conn_put() are used to control the life-time of an
@@ -1058,13 +1452,27 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data)
dev_set_drvdata(&hdev->dev, data);
}
+static inline void *hci_get_priv(struct hci_dev *hdev)
+{
+ return (char *)hdev + sizeof(*hdev);
+}
+
struct hci_dev *hci_dev_get(int index);
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type);
-struct hci_dev *hci_alloc_dev(void);
+struct hci_dev *hci_alloc_dev_priv(int sizeof_priv);
+
+static inline struct hci_dev *hci_alloc_dev(void)
+{
+ return hci_alloc_dev_priv(0);
+}
+
void hci_free_dev(struct hci_dev *hdev);
int hci_register_dev(struct hci_dev *hdev);
void hci_unregister_dev(struct hci_dev *hdev);
+void hci_release_dev(struct hci_dev *hdev);
+int hci_register_suspend_notifier(struct hci_dev *hdev);
+int hci_unregister_suspend_notifier(struct hci_dev *hdev);
int hci_suspend_dev(struct hci_dev *hdev);
int hci_resume_dev(struct hci_dev *hdev);
int hci_reset_dev(struct hci_dev *hdev);
@@ -1072,6 +1480,21 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...);
__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...);
+
+static inline void hci_set_msft_opcode(struct hci_dev *hdev, __u16 opcode)
+{
+#if IS_ENABLED(CONFIG_BT_MSFTEXT)
+ hdev->msft_opcode = opcode;
+#endif
+}
+
+static inline void hci_set_aosp_capable(struct hci_dev *hdev)
+{
+#if IS_ENABLED(CONFIG_BT_AOSPEXT)
+ hdev->aosp_capable = true;
+#endif
+}
+
int hci_dev_open(__u16 dev);
int hci_dev_close(__u16 dev);
int hci_dev_do_close(struct hci_dev *hdev);
@@ -1090,12 +1513,19 @@ struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list,
struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk(
struct list_head *list, bdaddr_t *bdaddr,
u8 type);
+struct bdaddr_list_with_flags *
+hci_bdaddr_list_lookup_with_flags(struct list_head *list, bdaddr_t *bdaddr,
+ u8 type);
int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type);
int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr,
- u8 type, u8 *peer_irk, u8 *local_irk);
+ u8 type, u8 *peer_irk, u8 *local_irk);
+int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
+ u8 type, u32 flags);
int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type);
int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
- u8 type);
+ u8 type);
+int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr,
+ u8 type);
void hci_bdaddr_list_clear(struct list_head *list);
struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
@@ -1149,12 +1579,30 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
void hci_adv_instances_clear(struct hci_dev *hdev);
struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance);
struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance);
-int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
+struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
+ u32 flags, u16 adv_data_len, u8 *adv_data,
+ u16 scan_rsp_len, u8 *scan_rsp_data,
+ u16 timeout, u16 duration, s8 tx_power,
+ u32 min_interval, u32 max_interval,
+ u8 mesh_handle);
+struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
+ u32 flags, u8 data_len, u8 *data,
+ u32 min_interval, u32 max_interval);
+int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance,
u16 adv_data_len, u8 *adv_data,
- u16 scan_rsp_len, u8 *scan_rsp_data,
- u16 timeout, u16 duration);
+ u16 scan_rsp_len, u8 *scan_rsp_data);
int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance);
void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired);
+u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance);
+bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance);
+
+void hci_adv_monitors_clear(struct hci_dev *hdev);
+void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor);
+int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor);
+int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle);
+int hci_remove_all_adv_monitor(struct hci_dev *hdev);
+bool hci_is_adv_monitoring(struct hci_dev *hdev);
+int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev);
void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
@@ -1177,6 +1625,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define lmp_le_capable(dev) ((dev)->features[0][4] & LMP_LE)
#define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR)
#define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC)
+#define lmp_esco_2m_capable(dev) ((dev)->features[0][5] & LMP_EDR_ESCO_2M)
#define lmp_ext_inq_capable(dev) ((dev)->features[0][6] & LMP_EXT_INQ)
#define lmp_le_br_capable(dev) (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR))
#define lmp_ssp_capable(dev) ((dev)->features[0][6] & LMP_SIMPLE_PAIR)
@@ -1191,8 +1640,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT)
/* ----- Extended LMP capabilities ----- */
-#define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER)
-#define lmp_csb_slave_capable(dev) ((dev)->features[2][0] & LMP_CSB_SLAVE)
+#define lmp_cpb_central_capable(dev) ((dev)->features[2][0] & LMP_CPB_CENTRAL)
+#define lmp_cpb_peripheral_capable(dev) ((dev)->features[2][0] & LMP_CPB_PERIPHERAL)
#define lmp_sync_train_capable(dev) ((dev)->features[2][0] & LMP_SYNC_TRAIN)
#define lmp_sync_scan_capable(dev) ((dev)->features[2][0] & LMP_SYNC_SCAN)
#define lmp_sc_capable(dev) ((dev)->features[2][1] & LMP_SC)
@@ -1208,6 +1657,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
!hci_dev_test_flag(dev, HCI_AUTO_OFF))
#define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \
hci_dev_test_flag(dev, HCI_SC_ENABLED))
+#define rpa_valid(dev) (bacmp(&dev->rpa, BDADDR_ANY) && \
+ !hci_dev_test_flag(dev, HCI_RPA_EXPIRED))
+#define adv_rpa_valid(adv) (bacmp(&adv->random_addr, BDADDR_ANY) && \
+ !adv->rpa_expired)
#define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))
@@ -1218,6 +1671,22 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
+#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY)
+
+/* Use LL Privacy based address resolution if supported */
+#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \
+ hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY))
+
+#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \
+ (hdev->commands[39] & 0x04))
+
+/* Use enhanced synchronous connection if command is supported and its quirk
+ * has not been set.
+ */
+#define enhanced_sync_conn_capable(dev) \
+ (((dev)->commands[29] & 0x08) && \
+ !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks))
+
/* Use ext scanning if set ext scan param and ext scan enable is supported */
#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
((dev)->commands[37] & 0x40))
@@ -1227,6 +1696,27 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
/* Extended advertising support */
#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
+/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789:
+ *
+ * C24: Mandatory if the LE Controller supports Connection State and either
+ * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported
+ */
+#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \
+ ext_adv_capable(dev))
+
+/* Periodic advertising support */
+#define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV))
+
+/* CIS Master/Slave and BIS support */
+#define iso_capable(dev) (cis_capable(dev) || bis_capable(dev))
+#define cis_capable(dev) \
+ (cis_central_capable(dev) || cis_peripheral_capable(dev))
+#define cis_central_capable(dev) \
+ ((dev)->le_features[3] & HCI_LE_CIS_CENTRAL)
+#define cis_peripheral_capable(dev) \
+ ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL)
+#define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER)
+
/* ----- HCI protocols ----- */
#define HCI_PROTO_DEFER 0x01
@@ -1241,6 +1731,9 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
case ESCO_LINK:
return sco_connect_ind(hdev, bdaddr, flags);
+ case ISO_LINK:
+ return iso_connect_ind(hdev, bdaddr, flags);
+
default:
BT_ERR("unknown link type %d", type);
return -EINVAL;
@@ -1320,16 +1813,34 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
conn->security_cfm_cb(conn, status);
}
-static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status,
- __u8 encrypt)
+static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
{
struct hci_cb *cb;
+ __u8 encrypt;
+
+ if (conn->state == BT_CONFIG) {
+ if (!status)
+ conn->state = BT_CONNECTED;
+
+ hci_connect_cfm(conn, status);
+ hci_conn_drop(conn);
+ return;
+ }
- if (conn->sec_level == BT_SECURITY_SDP)
- conn->sec_level = BT_SECURITY_LOW;
+ if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
+ encrypt = 0x00;
+ else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
+ encrypt = 0x02;
+ else
+ encrypt = 0x01;
- if (conn->pending_sec_level > conn->sec_level)
- conn->sec_level = conn->pending_sec_level;
+ if (!status) {
+ if (conn->sec_level == BT_SECURITY_SDP)
+ conn->sec_level = BT_SECURITY_LOW;
+
+ if (conn->pending_sec_level > conn->sec_level)
+ conn->sec_level = conn->pending_sec_level;
+ }
mutex_lock(&hci_cb_list_lock);
list_for_each_entry(cb, &hci_cb_list, list) {
@@ -1367,43 +1878,6 @@ static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
mutex_unlock(&hci_cb_list_lock);
}
-static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
- size_t *data_len)
-{
- size_t parsed = 0;
-
- if (eir_len < 2)
- return NULL;
-
- while (parsed < eir_len - 1) {
- u8 field_len = eir[0];
-
- if (field_len == 0)
- break;
-
- parsed += field_len + 1;
-
- if (parsed > eir_len)
- break;
-
- if (eir[1] != type) {
- eir += field_len + 1;
- continue;
- }
-
- /* Zero length data */
- if (field_len == 1)
- return NULL;
-
- if (data_len)
- *data_len = field_len - 1;
-
- return &eir[2];
- }
-
- return NULL;
-}
-
static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type)
{
if (addr_type != ADDR_LE_DEV_RANDOM)
@@ -1460,10 +1934,6 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
int hci_register_cb(struct hci_cb *hcb);
int hci_unregister_cb(struct hci_cb *hcb);
-struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
- const void *param, u32 timeout);
-struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
- const void *param, u8 event, u32 timeout);
int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param);
@@ -1471,11 +1941,12 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
const void *param);
void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags);
void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb);
+void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb);
void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
+void *hci_recv_event_data(struct hci_dev *hdev, __u8 event);
-struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
- const void *param, u32 timeout);
+u32 hci_conn_get_phy(struct hci_conn *conn);
/* ----- HCI Sockets ----- */
void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
@@ -1492,6 +1963,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event);
#define HCI_MGMT_NO_HDEV BIT(1)
#define HCI_MGMT_UNTRUSTED BIT(2)
#define HCI_MGMT_UNCONFIGURED BIT(3)
+#define HCI_MGMT_HDEV_OPTIONAL BIT(4)
struct hci_mgmt_handler {
int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
@@ -1529,8 +2001,15 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
#define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04
#define DISCOV_BREDR_INQUIRY_LEN 0x08
#define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */
-#define DISCOV_LE_FAST_ADV_INT_MIN 100 /* msec */
-#define DISCOV_LE_FAST_ADV_INT_MAX 150 /* msec */
+#define DISCOV_LE_FAST_ADV_INT_MIN 0x00A0 /* 100 msec */
+#define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */
+#define DISCOV_LE_PER_ADV_INT_MIN 0x00A0 /* 200 msec */
+#define DISCOV_LE_PER_ADV_INT_MAX 0x00A0 /* 200 msec */
+#define DISCOV_LE_ADV_MESH_MIN 0x00A0 /* 100 msec */
+#define DISCOV_LE_ADV_MESH_MAX 0x00A0 /* 100 msec */
+#define INTERVAL_TO_MS(x) (((x) * 10) / 0x10)
+
+#define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */
void mgmt_fill_version_info(void *ver);
int mgmt_new_settings(struct hci_dev *hdev);
@@ -1542,7 +2021,7 @@ void __mgmt_power_off(struct hci_dev *hdev);
void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
bool persistent);
void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
- u32 flags, u8 *name, u8 name_len);
+ u8 *name, u8 name_len);
void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
u8 link_type, u8 addr_type, u8 reason,
bool mgmt_connected);
@@ -1573,7 +2052,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
u8 entered);
void mgmt_auth_failed(struct hci_conn *conn, u8 status);
void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
-void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
u8 status);
void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
@@ -1581,10 +2059,14 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status);
void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status);
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
- u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len);
+ u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len,
+ u64 instant);
void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, s8 rssi, u8 *name, u8 name_len);
void mgmt_discovering(struct hci_dev *hdev, u8 discovering);
+void mgmt_suspending(struct hci_dev *hdev, u8 state);
+void mgmt_resuming(struct hci_dev *hdev, u8 reason, bdaddr_t *bdaddr,
+ u8 addr_type);
bool mgmt_powering_down(struct hci_dev *hdev);
void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent);
void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent);
@@ -1595,15 +2077,17 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr,
u16 max_interval, u16 latency, u16 timeout);
void mgmt_smp_complete(struct hci_conn *conn, bool complete);
bool mgmt_get_connectable(struct hci_dev *hdev);
-void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status);
-void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status);
u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev);
void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
u8 instance);
void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
u8 instance);
+void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
+void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
+ bdaddr_t *bdaddr, u8 addr_type);
+int hci_abort_conn(struct hci_conn *conn, u8 reason);
u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
u16 to_multiplier);
void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
@@ -1616,4 +2100,9 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
#define SCO_AIRMODE_CVSD 0x0000
#define SCO_AIRMODE_TRANSP 0x0003
+#define LOCAL_CODEC_ACL_MASK BIT(0)
+#define LOCAL_CODEC_SCO_MASK BIT(1)
+
+#define TRANSPORT_TYPE_MAX 0x04
+
#endif /* __HCI_CORE_H */
diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h
index 8e9138acdae1..9949870f7d78 100644
--- a/include/net/bluetooth/hci_sock.h
+++ b/include/net/bluetooth/hci_sock.h
@@ -31,8 +31,8 @@
#define HCI_TIME_STAMP 3
/* CMSG flags */
-#define HCI_CMSG_DIR 0x0001
-#define HCI_CMSG_TSTAMP 0x0002
+#define HCI_CMSG_DIR 0x01
+#define HCI_CMSG_TSTAMP 0x02
struct sockaddr_hci {
sa_family_t hci_family;
@@ -144,19 +144,19 @@ struct hci_dev_req {
struct hci_dev_list_req {
__u16 dev_num;
- struct hci_dev_req dev_req[0]; /* hci_dev_req structures */
+ struct hci_dev_req dev_req[]; /* hci_dev_req structures */
};
struct hci_conn_list_req {
__u16 dev_id;
__u16 conn_num;
- struct hci_conn_info conn_info[0];
+ struct hci_conn_info conn_info[];
};
struct hci_conn_info_req {
bdaddr_t bdaddr;
__u8 type;
- struct hci_conn_info conn_info[0];
+ struct hci_conn_info conn_info[];
};
struct hci_auth_info_req {
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
new file mode 100644
index 000000000000..17f5a4c32f36
--- /dev/null
+++ b/include/net/bluetooth/hci_sync.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BlueZ - Bluetooth protocol stack for Linux
+ *
+ * Copyright (C) 2021 Intel Corporation
+ */
+
+typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data);
+typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data,
+ int err);
+
+struct hci_cmd_sync_work_entry {
+ struct list_head list;
+ hci_cmd_sync_work_func_t func;
+ void *data;
+ hci_cmd_sync_work_destroy_t destroy;
+};
+
+struct adv_info;
+/* Function with sync suffix shall not be called with hdev->lock held as they
+ * wait the command to complete and in the meantime an event could be received
+ * which could attempt to acquire hdev->lock causing a deadlock.
+ */
+struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout);
+struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout);
+struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u8 event, u32 timeout);
+struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u8 event, u32 timeout,
+ struct sock *sk);
+int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout);
+int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u8 event, u32 timeout,
+ struct sock *sk);
+
+void hci_cmd_sync_init(struct hci_dev *hdev);
+void hci_cmd_sync_clear(struct hci_dev *hdev);
+void hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
+void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
+
+int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy);
+
+int hci_update_eir_sync(struct hci_dev *hdev);
+int hci_update_class_sync(struct hci_dev *hdev);
+
+int hci_update_eir_sync(struct hci_dev *hdev);
+int hci_update_class_sync(struct hci_dev *hdev);
+int hci_update_name_sync(struct hci_dev *hdev);
+int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode);
+
+int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
+ bool use_rpa, struct adv_info *adv_instance,
+ u8 *own_addr_type, bdaddr_t *rand_addr);
+
+int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy,
+ bool rpa, u8 *own_addr_type);
+
+int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance);
+int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance);
+int hci_update_adv_data(struct hci_dev *hdev, u8 instance);
+int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance,
+ bool force);
+
+int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance);
+int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance);
+int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance);
+int hci_enable_advertising_sync(struct hci_dev *hdev);
+int hci_enable_advertising(struct hci_dev *hdev);
+
+int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
+ u8 *data, u32 flags, u16 min_interval,
+ u16 max_interval, u16 sync_interval);
+
+int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk,
+ u8 instance, bool force);
+int hci_disable_advertising_sync(struct hci_dev *hdev);
+int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk,
+ u8 instance, bool force);
+int hci_update_passive_scan_sync(struct hci_dev *hdev);
+int hci_update_passive_scan(struct hci_dev *hdev);
+int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle);
+int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type);
+int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val);
+int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp);
+
+int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable);
+int hci_update_scan_sync(struct hci_dev *hdev);
+int hci_update_scan(struct hci_dev *hdev);
+
+int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul);
+int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
+ struct sock *sk);
+int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance);
+struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext,
+ struct sock *sk);
+
+int hci_reset_sync(struct hci_dev *hdev);
+int hci_dev_open_sync(struct hci_dev *hdev);
+int hci_dev_close_sync(struct hci_dev *hdev);
+
+int hci_powered_update_sync(struct hci_dev *hdev);
+int hci_set_powered_sync(struct hci_dev *hdev, u8 val);
+
+int hci_update_discoverable_sync(struct hci_dev *hdev);
+int hci_update_discoverable(struct hci_dev *hdev);
+
+int hci_update_connectable_sync(struct hci_dev *hdev);
+
+int hci_start_discovery_sync(struct hci_dev *hdev);
+int hci_stop_discovery_sync(struct hci_dev *hdev);
+
+int hci_suspend_sync(struct hci_dev *hdev);
+int hci_resume_sync(struct hci_dev *hdev);
+
+struct hci_conn;
+
+int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason);
+
+int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn);
+
+int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle);
+
+int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason);
+
+int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle);
+
+int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle);
diff --git a/include/net/bluetooth/iso.h b/include/net/bluetooth/iso.h
new file mode 100644
index 000000000000..3f4fe8b78e1b
--- /dev/null
+++ b/include/net/bluetooth/iso.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BlueZ - Bluetooth protocol stack for Linux
+ *
+ * Copyright (C) 2022 Intel Corporation
+ */
+
+#ifndef __ISO_H
+#define __ISO_H
+
+/* ISO defaults */
+#define ISO_DEFAULT_MTU 251
+#define ISO_MAX_NUM_BIS 0x1f
+
+/* ISO socket broadcast address */
+struct sockaddr_iso_bc {
+ bdaddr_t bc_bdaddr;
+ __u8 bc_bdaddr_type;
+ __u8 bc_sid;
+ __u8 bc_num_bis;
+ __u8 bc_bis[ISO_MAX_NUM_BIS];
+};
+
+/* ISO socket address */
+struct sockaddr_iso {
+ sa_family_t iso_family;
+ bdaddr_t iso_bdaddr;
+ __u8 iso_bdaddr_type;
+ struct sockaddr_iso_bc iso_bc[];
+};
+
+#endif /* __ISO_H */
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 093aedebdf0c..2f766e3437ce 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -47,6 +47,7 @@
#define L2CAP_DEFAULT_ACC_LAT 0xFFFFFFFF
#define L2CAP_BREDR_MAX_PAYLOAD 1019 /* 3-DH5 packet */
#define L2CAP_LE_MIN_MTU 23
+#define L2CAP_ECRED_CONN_SCID_MAX 5
#define L2CAP_DISC_TIMEOUT msecs_to_jiffies(100)
#define L2CAP_DISC_REJ_TIMEOUT msecs_to_jiffies(5000)
@@ -119,6 +120,10 @@ struct l2cap_conninfo {
#define L2CAP_LE_CONN_REQ 0x14
#define L2CAP_LE_CONN_RSP 0x15
#define L2CAP_LE_CREDITS 0x16
+#define L2CAP_ECRED_CONN_REQ 0x17
+#define L2CAP_ECRED_CONN_RSP 0x18
+#define L2CAP_ECRED_RECONF_REQ 0x19
+#define L2CAP_ECRED_RECONF_RSP 0x1a
/* L2CAP extended feature mask */
#define L2CAP_FEAT_FLOWCTL 0x00000001
@@ -202,6 +207,7 @@ struct l2cap_hdr {
__le16 len;
__le16 cid;
} __packed;
+#define L2CAP_LEN_SIZE 2
#define L2CAP_HDR_SIZE 4
#define L2CAP_ENH_HDR_SIZE 6
#define L2CAP_EXT_HDR_SIZE 8
@@ -290,6 +296,8 @@ struct l2cap_conn_rsp {
#define L2CAP_CR_LE_ENCRYPTION 0x0008
#define L2CAP_CR_LE_INVALID_SCID 0x0009
#define L2CAP_CR_LE_SCID_IN_USE 0X000A
+#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B
+#define L2CAP_CR_LE_INVALID_PARAMS 0X000C
/* connect/create channel status */
#define L2CAP_CS_NO_INFO 0x0000
@@ -299,14 +307,14 @@ struct l2cap_conn_rsp {
struct l2cap_conf_req {
__le16 dcid;
__le16 flags;
- __u8 data[0];
+ __u8 data[];
} __packed;
struct l2cap_conf_rsp {
__le16 scid;
__le16 flags;
__le16 result;
- __u8 data[0];
+ __u8 data[];
} __packed;
#define L2CAP_CONF_SUCCESS 0x0000
@@ -322,7 +330,7 @@ struct l2cap_conf_rsp {
struct l2cap_conf_opt {
__u8 type;
__u8 len;
- __u8 val[0];
+ __u8 val[];
} __packed;
#define L2CAP_CONF_OPT_SIZE 2
@@ -359,6 +367,7 @@ struct l2cap_conf_rfc {
* ever be used in the BR/EDR configuration phase.
*/
#define L2CAP_MODE_LE_FLOWCTL 0x80
+#define L2CAP_MODE_EXT_FLOWCTL 0x81
struct l2cap_conf_efs {
__u8 id;
@@ -392,7 +401,7 @@ struct l2cap_info_req {
struct l2cap_info_rsp {
__le16 type;
__le16 result;
- __u8 data[0];
+ __u8 data[];
} __packed;
struct l2cap_create_chan_req {
@@ -483,6 +492,40 @@ struct l2cap_le_credits {
__le16 credits;
} __packed;
+#define L2CAP_ECRED_MIN_MTU 64
+#define L2CAP_ECRED_MIN_MPS 64
+#define L2CAP_ECRED_MAX_CID 5
+
+struct l2cap_ecred_conn_req {
+ __le16 psm;
+ __le16 mtu;
+ __le16 mps;
+ __le16 credits;
+ __le16 scid[];
+} __packed;
+
+struct l2cap_ecred_conn_rsp {
+ __le16 mtu;
+ __le16 mps;
+ __le16 credits;
+ __le16 result;
+ __le16 dcid[];
+};
+
+struct l2cap_ecred_reconf_req {
+ __le16 mtu;
+ __le16 mps;
+ __le16 scid[];
+} __packed;
+
+#define L2CAP_RECONF_SUCCESS 0x0000
+#define L2CAP_RECONF_INVALID_MTU 0x0001
+#define L2CAP_RECONF_INVALID_MPS 0x0002
+
+struct l2cap_ecred_reconf_rsp {
+ __le16 result;
+} __packed;
+
/* ----- L2CAP channels and connections ----- */
struct l2cap_seq_list {
__u16 head;
@@ -620,9 +663,12 @@ struct l2cap_ops {
void (*suspend) (struct l2cap_chan *chan);
void (*set_shutdown) (struct l2cap_chan *chan);
long (*get_sndtimeo) (struct l2cap_chan *chan);
+ struct pid *(*get_peer_pid) (struct l2cap_chan *chan);
struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan,
unsigned long hdr_len,
unsigned long len, int nb);
+ int (*filter) (struct l2cap_chan * chan,
+ struct sk_buff *skb);
};
struct l2cap_conn {
@@ -724,6 +770,7 @@ enum {
FLAG_EFS_ENABLE,
FLAG_DEFER_SETUP,
FLAG_LE_CONN_REQ_SENT,
+ FLAG_ECRED_CONN_REQ_SENT,
FLAG_PENDING_SECURITY,
FLAG_HOLD_HCI_CONN,
};
@@ -800,6 +847,7 @@ enum {
};
void l2cap_chan_hold(struct l2cap_chan *c);
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
void l2cap_chan_put(struct l2cap_chan *c);
static inline void l2cap_chan_lock(struct l2cap_chan *chan)
@@ -917,12 +965,14 @@ static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan)
}
extern bool disable_ertm;
+extern bool enable_ecred;
int l2cap_init_sockets(void);
void l2cap_cleanup_sockets(void);
bool l2cap_is_socket(struct socket *sock);
void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan);
+void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan);
void __l2cap_connect_rsp_defer(struct l2cap_chan *chan);
int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm);
@@ -932,6 +982,7 @@ struct l2cap_chan *l2cap_chan_create(void);
void l2cap_chan_close(struct l2cap_chan *chan, int reason);
int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
bdaddr_t *dst, u8 dst_type);
+int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu);
int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len);
void l2cap_chan_busy(struct l2cap_chan *chan, int busy);
int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator);
@@ -939,6 +990,9 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan);
int l2cap_ertm_init(struct l2cap_chan *chan);
void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan);
void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan);
+typedef void (*l2cap_chan_func_t)(struct l2cap_chan *chan, void *data);
+void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func,
+ void *data);
void l2cap_chan_del(struct l2cap_chan *chan, int err);
void l2cap_send_conn_req(struct l2cap_chan *chan);
void l2cap_move_start(struct l2cap_chan *chan);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index a90666af05bd..743f6f59dff8 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -52,6 +52,12 @@ struct mgmt_hdr {
__le16 len;
} __packed;
+struct mgmt_tlv {
+ __le16 type;
+ __u8 length;
+ __u8 value[];
+} __packed;
+
struct mgmt_addr_info {
bdaddr_t bdaddr;
__u8 type;
@@ -70,14 +76,14 @@ struct mgmt_rp_read_version {
struct mgmt_rp_read_commands {
__le16 num_commands;
__le16 num_events;
- __le16 opcodes[0];
+ __le16 opcodes[];
} __packed;
#define MGMT_OP_READ_INDEX_LIST 0x0003
#define MGMT_READ_INDEX_LIST_SIZE 0
struct mgmt_rp_read_index_list {
__le16 num_controllers;
- __le16 index[0];
+ __le16 index[];
} __packed;
/* Reserve one extra byte for names in management messages so that they
@@ -101,7 +107,8 @@ struct mgmt_rp_read_index_list {
#define MGMT_SETTING_PRIVACY 0x00002000
#define MGMT_SETTING_CONFIGURATION 0x00004000
#define MGMT_SETTING_STATIC_ADDRESS 0x00008000
-#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000
+#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000
+#define MGMT_SETTING_WIDEBAND_SPEECH 0x00020000
#define MGMT_OP_READ_INFO 0x0004
#define MGMT_READ_INFO_SIZE 0
@@ -182,7 +189,7 @@ struct mgmt_link_key_info {
struct mgmt_cp_load_link_keys {
__u8 debug_keys;
__le16 key_count;
- struct mgmt_link_key_info keys[0];
+ struct mgmt_link_key_info keys[];
} __packed;
#define MGMT_LOAD_LINK_KEYS_SIZE 3
@@ -195,7 +202,7 @@ struct mgmt_cp_load_link_keys {
struct mgmt_ltk_info {
struct mgmt_addr_info addr;
__u8 type;
- __u8 master;
+ __u8 initiator;
__u8 enc_size;
__le16 ediv;
__le64 rand;
@@ -205,7 +212,7 @@ struct mgmt_ltk_info {
#define MGMT_OP_LOAD_LONG_TERM_KEYS 0x0013
struct mgmt_cp_load_long_term_keys {
__le16 key_count;
- struct mgmt_ltk_info keys[0];
+ struct mgmt_ltk_info keys[];
} __packed;
#define MGMT_LOAD_LONG_TERM_KEYS_SIZE 2
@@ -222,7 +229,7 @@ struct mgmt_rp_disconnect {
#define MGMT_GET_CONNECTIONS_SIZE 0
struct mgmt_rp_get_connections {
__le16 conn_count;
- struct mgmt_addr_info addr[0];
+ struct mgmt_addr_info addr[];
} __packed;
#define MGMT_OP_PIN_CODE_REPLY 0x0016
@@ -412,7 +419,7 @@ struct mgmt_irk_info {
#define MGMT_OP_LOAD_IRKS 0x0030
struct mgmt_cp_load_irks {
__le16 irk_count;
- struct mgmt_irk_info irks[0];
+ struct mgmt_irk_info irks[];
} __packed;
#define MGMT_LOAD_IRKS_SIZE 2
@@ -464,7 +471,7 @@ struct mgmt_conn_param {
#define MGMT_OP_LOAD_CONN_PARAM 0x0035
struct mgmt_cp_load_conn_param {
__le16 param_count;
- struct mgmt_conn_param params[0];
+ struct mgmt_conn_param params[];
} __packed;
#define MGMT_LOAD_CONN_PARAM_SIZE 2
@@ -472,7 +479,7 @@ struct mgmt_cp_load_conn_param {
#define MGMT_READ_UNCONF_INDEX_LIST_SIZE 0
struct mgmt_rp_read_unconf_index_list {
__le16 num_controllers;
- __le16 index[0];
+ __le16 index[];
} __packed;
#define MGMT_OPTION_EXTERNAL_CONFIG 0x00000001
@@ -503,7 +510,7 @@ struct mgmt_cp_start_service_discovery {
__u8 type;
__s8 rssi;
__le16 uuid_count;
- __u8 uuids[0][16];
+ __u8 uuids[][16];
} __packed;
#define MGMT_START_SERVICE_DISCOVERY_SIZE 4
@@ -515,7 +522,7 @@ struct mgmt_cp_read_local_oob_ext_data {
struct mgmt_rp_read_local_oob_ext_data {
__u8 type;
__le16 eir_len;
- __u8 eir[0];
+ __u8 eir[];
} __packed;
#define MGMT_OP_READ_EXT_INDEX_LIST 0x003C
@@ -526,7 +533,7 @@ struct mgmt_rp_read_ext_index_list {
__le16 index;
__u8 type;
__u8 bus;
- } entry[0];
+ } entry[];
} __packed;
#define MGMT_OP_READ_ADV_FEATURES 0x0003D
@@ -537,7 +544,7 @@ struct mgmt_rp_read_adv_features {
__u8 max_scan_rsp_len;
__u8 max_instances;
__u8 num_instances;
- __u8 instance[0];
+ __u8 instance[];
} __packed;
#define MGMT_OP_ADD_ADVERTISING 0x003E
@@ -548,7 +555,7 @@ struct mgmt_cp_add_advertising {
__le16 timeout;
__u8 adv_data_len;
__u8 scan_rsp_len;
- __u8 data[0];
+ __u8 data[];
} __packed;
#define MGMT_ADD_ADVERTISING_SIZE 11
struct mgmt_rp_add_advertising {
@@ -565,6 +572,13 @@ struct mgmt_rp_add_advertising {
#define MGMT_ADV_FLAG_SEC_1M BIT(7)
#define MGMT_ADV_FLAG_SEC_2M BIT(8)
#define MGMT_ADV_FLAG_SEC_CODED BIT(9)
+#define MGMT_ADV_FLAG_CAN_SET_TX_POWER BIT(10)
+#define MGMT_ADV_FLAG_HW_OFFLOAD BIT(11)
+#define MGMT_ADV_PARAM_DURATION BIT(12)
+#define MGMT_ADV_PARAM_TIMEOUT BIT(13)
+#define MGMT_ADV_PARAM_INTERVALS BIT(14)
+#define MGMT_ADV_PARAM_TX_POWER BIT(15)
+#define MGMT_ADV_PARAM_SCAN_RSP BIT(16)
#define MGMT_ADV_FLAG_SEC_MASK (MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \
MGMT_ADV_FLAG_SEC_CODED)
@@ -602,7 +616,7 @@ struct mgmt_rp_read_ext_info {
__le32 supported_settings;
__le32 current_settings;
__le16 eir_len;
- __u8 eir[0];
+ __u8 eir[];
} __packed;
#define MGMT_OP_SET_APPEARANCE 0x0043
@@ -612,7 +626,7 @@ struct mgmt_cp_set_appearance {
#define MGMT_SET_APPEARANCE_SIZE 2
#define MGMT_OP_GET_PHY_CONFIGURATION 0x0044
-struct mgmt_rp_get_phy_confguration {
+struct mgmt_rp_get_phy_configuration {
__le32 supported_phys;
__le32 configurable_phys;
__le32 selected_phys;
@@ -649,7 +663,7 @@ struct mgmt_rp_get_phy_confguration {
MGMT_PHY_LE_CODED_RX)
#define MGMT_OP_SET_PHY_CONFIGURATION 0x0045
-struct mgmt_cp_set_phy_confguration {
+struct mgmt_cp_set_phy_configuration {
__le32 selected_phys;
} __packed;
#define MGMT_SET_PHY_CONFIGURATION_SIZE 4
@@ -667,15 +681,204 @@ struct mgmt_blocked_key_info {
struct mgmt_cp_set_blocked_keys {
__le16 key_count;
- struct mgmt_blocked_key_info keys[0];
+ struct mgmt_blocked_key_info keys[];
} __packed;
#define MGMT_OP_SET_BLOCKED_KEYS_SIZE 2
+#define MGMT_OP_SET_WIDEBAND_SPEECH 0x0047
+
+#define MGMT_CAP_SEC_FLAGS 0x01
+#define MGMT_CAP_MAX_ENC_KEY_SIZE 0x02
+#define MGMT_CAP_SMP_MAX_ENC_KEY_SIZE 0x03
+#define MGMT_CAP_LE_TX_PWR 0x04
+
+#define MGMT_OP_READ_CONTROLLER_CAP 0x0048
+#define MGMT_READ_CONTROLLER_CAP_SIZE 0
+struct mgmt_rp_read_controller_cap {
+ __le16 cap_len;
+ __u8 cap[];
+} __packed;
+
+#define MGMT_OP_READ_EXP_FEATURES_INFO 0x0049
+#define MGMT_READ_EXP_FEATURES_INFO_SIZE 0
+struct mgmt_rp_read_exp_features_info {
+ __le16 feature_count;
+ struct {
+ __u8 uuid[16];
+ __le32 flags;
+ } features[];
+} __packed;
+
+#define MGMT_OP_SET_EXP_FEATURE 0x004a
+struct mgmt_cp_set_exp_feature {
+ __u8 uuid[16];
+ __u8 param[];
+} __packed;
+#define MGMT_SET_EXP_FEATURE_SIZE 16
+struct mgmt_rp_set_exp_feature {
+ __u8 uuid[16];
+ __le32 flags;
+} __packed;
+
+#define MGMT_OP_READ_DEF_SYSTEM_CONFIG 0x004b
+#define MGMT_READ_DEF_SYSTEM_CONFIG_SIZE 0
+
+#define MGMT_OP_SET_DEF_SYSTEM_CONFIG 0x004c
+#define MGMT_SET_DEF_SYSTEM_CONFIG_SIZE 0
+
+#define MGMT_OP_READ_DEF_RUNTIME_CONFIG 0x004d
+#define MGMT_READ_DEF_RUNTIME_CONFIG_SIZE 0
+
+#define MGMT_OP_SET_DEF_RUNTIME_CONFIG 0x004e
+#define MGMT_SET_DEF_RUNTIME_CONFIG_SIZE 0
+
+#define MGMT_OP_GET_DEVICE_FLAGS 0x004F
+#define MGMT_GET_DEVICE_FLAGS_SIZE 7
+struct mgmt_cp_get_device_flags {
+ struct mgmt_addr_info addr;
+} __packed;
+struct mgmt_rp_get_device_flags {
+ struct mgmt_addr_info addr;
+ __le32 supported_flags;
+ __le32 current_flags;
+} __packed;
+
+#define MGMT_OP_SET_DEVICE_FLAGS 0x0050
+#define MGMT_SET_DEVICE_FLAGS_SIZE 11
+struct mgmt_cp_set_device_flags {
+ struct mgmt_addr_info addr;
+ __le32 current_flags;
+} __packed;
+struct mgmt_rp_set_device_flags {
+ struct mgmt_addr_info addr;
+} __packed;
+
+#define MGMT_ADV_MONITOR_FEATURE_MASK_OR_PATTERNS BIT(0)
+
+#define MGMT_OP_READ_ADV_MONITOR_FEATURES 0x0051
+#define MGMT_READ_ADV_MONITOR_FEATURES_SIZE 0
+struct mgmt_rp_read_adv_monitor_features {
+ __le32 supported_features;
+ __le32 enabled_features;
+ __le16 max_num_handles;
+ __u8 max_num_patterns;
+ __le16 num_handles;
+ __le16 handles[];
+} __packed;
+
+struct mgmt_adv_pattern {
+ __u8 ad_type;
+ __u8 offset;
+ __u8 length;
+ __u8 value[31];
+} __packed;
+
+#define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052
+struct mgmt_cp_add_adv_patterns_monitor {
+ __u8 pattern_count;
+ struct mgmt_adv_pattern patterns[];
+} __packed;
+#define MGMT_ADD_ADV_PATTERNS_MONITOR_SIZE 1
+struct mgmt_rp_add_adv_patterns_monitor {
+ __le16 monitor_handle;
+} __packed;
+
+#define MGMT_OP_REMOVE_ADV_MONITOR 0x0053
+struct mgmt_cp_remove_adv_monitor {
+ __le16 monitor_handle;
+} __packed;
+#define MGMT_REMOVE_ADV_MONITOR_SIZE 2
+struct mgmt_rp_remove_adv_monitor {
+ __le16 monitor_handle;
+} __packed;
+
+#define MGMT_OP_ADD_EXT_ADV_PARAMS 0x0054
+struct mgmt_cp_add_ext_adv_params {
+ __u8 instance;
+ __le32 flags;
+ __le16 duration;
+ __le16 timeout;
+ __le32 min_interval;
+ __le32 max_interval;
+ __s8 tx_power;
+} __packed;
+#define MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE 18
+struct mgmt_rp_add_ext_adv_params {
+ __u8 instance;
+ __s8 tx_power;
+ __u8 max_adv_data_len;
+ __u8 max_scan_rsp_len;
+} __packed;
+
+#define MGMT_OP_ADD_EXT_ADV_DATA 0x0055
+struct mgmt_cp_add_ext_adv_data {
+ __u8 instance;
+ __u8 adv_data_len;
+ __u8 scan_rsp_len;
+ __u8 data[];
+} __packed;
+#define MGMT_ADD_EXT_ADV_DATA_SIZE 3
+struct mgmt_rp_add_ext_adv_data {
+ __u8 instance;
+} __packed;
+
+struct mgmt_adv_rssi_thresholds {
+ __s8 high_threshold;
+ __le16 high_threshold_timeout;
+ __s8 low_threshold;
+ __le16 low_threshold_timeout;
+ __u8 sampling_period;
+} __packed;
+
+#define MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI 0x0056
+struct mgmt_cp_add_adv_patterns_monitor_rssi {
+ struct mgmt_adv_rssi_thresholds rssi;
+ __u8 pattern_count;
+ struct mgmt_adv_pattern patterns[];
+} __packed;
+#define MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE 8
+#define MGMT_OP_SET_MESH_RECEIVER 0x0057
+struct mgmt_cp_set_mesh {
+ __u8 enable;
+ __le16 window;
+ __le16 period;
+ __u8 num_ad_types;
+ __u8 ad_types[];
+} __packed;
+#define MGMT_SET_MESH_RECEIVER_SIZE 6
+
+#define MGMT_OP_MESH_READ_FEATURES 0x0058
+#define MGMT_MESH_READ_FEATURES_SIZE 0
+#define MESH_HANDLES_MAX 3
+struct mgmt_rp_mesh_read_features {
+ __le16 index;
+ __u8 max_handles;
+ __u8 used_handles;
+ __u8 handles[MESH_HANDLES_MAX];
+} __packed;
+
+#define MGMT_OP_MESH_SEND 0x0059
+struct mgmt_cp_mesh_send {
+ struct mgmt_addr_info addr;
+ __le64 instant;
+ __le16 delay;
+ __u8 cnt;
+ __u8 adv_data_len;
+ __u8 adv_data[];
+} __packed;
+#define MGMT_MESH_SEND_SIZE 19
+
+#define MGMT_OP_MESH_SEND_CANCEL 0x005A
+struct mgmt_cp_mesh_send_cancel {
+ __u8 handle;
+} __packed;
+#define MGMT_MESH_SEND_CANCEL_SIZE 1
+
#define MGMT_EV_CMD_COMPLETE 0x0001
struct mgmt_ev_cmd_complete {
__le16 opcode;
__u8 status;
- __u8 data[0];
+ __u8 data[];
} __packed;
#define MGMT_EV_CMD_STATUS 0x0002
@@ -723,7 +926,7 @@ struct mgmt_ev_device_connected {
struct mgmt_addr_info addr;
__le32 flags;
__le16 eir_len;
- __u8 eir[0];
+ __u8 eir[];
} __packed;
#define MGMT_DEV_DISCONN_UNKNOWN 0x00
@@ -731,6 +934,7 @@ struct mgmt_ev_device_connected {
#define MGMT_DEV_DISCONN_LOCAL_HOST 0x02
#define MGMT_DEV_DISCONN_REMOTE 0x03
#define MGMT_DEV_DISCONN_AUTH_FAILURE 0x04
+#define MGMT_DEV_DISCONN_LOCAL_HOST_SUSPEND 0x05
#define MGMT_EV_DEVICE_DISCONNECTED 0x000C
struct mgmt_ev_device_disconnected {
@@ -768,9 +972,11 @@ struct mgmt_ev_auth_failed {
__u8 status;
} __packed;
-#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01
-#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02
-#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04
+#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01
+#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02
+#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04
+#define MGMT_DEV_FOUND_INITIATED_CONN 0x08
+#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED 0x10
#define MGMT_EV_DEVICE_FOUND 0x0012
struct mgmt_ev_device_found {
@@ -778,7 +984,7 @@ struct mgmt_ev_device_found {
__s8 rssi;
__le32 flags;
__le16 eir_len;
- __u8 eir[0];
+ __u8 eir[];
} __packed;
#define MGMT_EV_DISCOVERING 0x0013
@@ -873,7 +1079,7 @@ struct mgmt_ev_ext_index {
struct mgmt_ev_local_oob_data_updated {
__u8 type;
__le16 eir_len;
- __u8 eir[0];
+ __u8 eir[];
} __packed;
#define MGMT_EV_ADVERTISING_ADDED 0x0023
@@ -889,10 +1095,80 @@ struct mgmt_ev_advertising_removed {
#define MGMT_EV_EXT_INFO_CHANGED 0x0025
struct mgmt_ev_ext_info_changed {
__le16 eir_len;
- __u8 eir[0];
+ __u8 eir[];
} __packed;
#define MGMT_EV_PHY_CONFIGURATION_CHANGED 0x0026
struct mgmt_ev_phy_configuration_changed {
__le32 selected_phys;
} __packed;
+
+#define MGMT_EV_EXP_FEATURE_CHANGED 0x0027
+struct mgmt_ev_exp_feature_changed {
+ __u8 uuid[16];
+ __le32 flags;
+} __packed;
+
+#define MGMT_EV_DEVICE_FLAGS_CHANGED 0x002a
+struct mgmt_ev_device_flags_changed {
+ struct mgmt_addr_info addr;
+ __le32 supported_flags;
+ __le32 current_flags;
+} __packed;
+
+#define MGMT_EV_ADV_MONITOR_ADDED 0x002b
+struct mgmt_ev_adv_monitor_added {
+ __le16 monitor_handle;
+} __packed;
+
+#define MGMT_EV_ADV_MONITOR_REMOVED 0x002c
+struct mgmt_ev_adv_monitor_removed {
+ __le16 monitor_handle;
+} __packed;
+
+#define MGMT_EV_CONTROLLER_SUSPEND 0x002d
+struct mgmt_ev_controller_suspend {
+ __u8 suspend_state;
+} __packed;
+
+#define MGMT_EV_CONTROLLER_RESUME 0x002e
+struct mgmt_ev_controller_resume {
+ __u8 wake_reason;
+ struct mgmt_addr_info addr;
+} __packed;
+
+#define MGMT_WAKE_REASON_NON_BT_WAKE 0x0
+#define MGMT_WAKE_REASON_UNEXPECTED 0x1
+#define MGMT_WAKE_REASON_REMOTE_WAKE 0x2
+
+#define MGMT_EV_ADV_MONITOR_DEVICE_FOUND 0x002f
+struct mgmt_ev_adv_monitor_device_found {
+ __le16 monitor_handle;
+ struct mgmt_addr_info addr;
+ __s8 rssi;
+ __le32 flags;
+ __le16 eir_len;
+ __u8 eir[];
+} __packed;
+
+#define MGMT_EV_ADV_MONITOR_DEVICE_LOST 0x0030
+struct mgmt_ev_adv_monitor_device_lost {
+ __le16 monitor_handle;
+ struct mgmt_addr_info addr;
+} __packed;
+
+#define MGMT_EV_MESH_DEVICE_FOUND 0x0031
+struct mgmt_ev_mesh_device_found {
+ struct mgmt_addr_info addr;
+ __s8 rssi;
+ __le64 instant;
+ __le32 flags;
+ __le16 eir_len;
+ __u8 eir[];
+} __packed;
+
+
+#define MGMT_EV_MESH_PACKET_CMPLT 0x0032
+struct mgmt_ev_mesh_pkt_cmplt {
+ __u8 handle;
+} __packed;
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index da4acefe39c8..99d26879b02a 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -34,7 +34,6 @@
#define RFCOMM_DEFAULT_MTU 127
#define RFCOMM_DEFAULT_CREDITS 7
-#define RFCOMM_MAX_L2CAP_MTU 1013
#define RFCOMM_MAX_CREDITS 40
#define RFCOMM_SKB_HEAD_RESERVE 8
@@ -356,7 +355,7 @@ struct rfcomm_dev_info {
struct rfcomm_dev_list_req {
u16 dev_num;
- struct rfcomm_dev_info dev_info[0];
+ struct rfcomm_dev_info dev_info[];
};
int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h
index f40ddb4264fc..1aa2e14b6c94 100644
--- a/include/net/bluetooth/sco.h
+++ b/include/net/bluetooth/sco.h
@@ -46,4 +46,6 @@ struct sco_conninfo {
__u8 dev_class[3];
};
+#define SCO_CMSG_PKT_STATUS 0x01
+
#endif /* __SCO_H */
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index c8696a230b7d..a016f275cb01 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -15,8 +15,6 @@
#define PKT_TYPE_LACPDU cpu_to_be16(ETH_P_SLOW)
#define AD_TIMER_INTERVAL 100 /*msec*/
-#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
-
#define AD_LACP_SLOW 0
#define AD_LACP_FAST 1
@@ -262,7 +260,7 @@ struct ad_system {
struct ad_bond_info {
struct ad_system system; /* 802.3ad system structure */
struct bond_3ad_stats stats;
- u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */
+ atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */
u16 aggregator_identifier;
};
@@ -290,7 +288,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state)
}
/* ========== AD Exported functions to the main bonding code ========== */
-void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution);
+void bond_3ad_initialize(struct bonding *bond);
void bond_3ad_bind_slave(struct slave *slave);
void bond_3ad_unbind_slave(struct slave *slave);
void bond_3ad_state_machine_handler(struct work_struct *);
@@ -303,6 +301,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave);
int bond_3ad_set_carrier(struct bonding *bond);
+void bond_3ad_update_lacp_active(struct bonding *bond);
void bond_3ad_update_lacp_rate(struct bonding *bond);
void bond_3ad_update_ad_actor_settings(struct bonding *bond);
int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index b3504fcd773d..191c36afa1f4 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -126,7 +126,7 @@ struct tlb_slave_info {
struct alb_bond_info {
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
u32 unbalanced_load;
- int tx_rebalance_counter;
+ atomic_t tx_rebalance_counter;
int lp_counter;
/* -------- rlb parameters -------- */
int rlb_enabled;
@@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
+struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
+ struct sk_buff *skb);
+struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
+ struct sk_buff *skb);
void bond_alb_monitor(struct work_struct *);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 9d382f2f0bc5..69292ecc0325 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -7,6 +7,14 @@
#ifndef _NET_BOND_OPTIONS_H
#define _NET_BOND_OPTIONS_H
+#include <linux/bits.h>
+#include <linux/limits.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+struct netlink_ext_ack;
+struct nlattr;
+
#define BOND_OPT_MAX_NAMELEN 32
#define BOND_OPT_VALID(opt) ((opt) < BOND_OPT_LAST)
#define BOND_MODE_ALL_EX(x) (~(x))
@@ -64,19 +72,30 @@ enum {
BOND_OPT_AD_USER_PORT_KEY,
BOND_OPT_NUM_PEER_NOTIF_ALIAS,
BOND_OPT_PEER_NOTIF_DELAY,
+ BOND_OPT_LACP_ACTIVE,
+ BOND_OPT_MISSED_MAX,
+ BOND_OPT_NS_TARGETS,
+ BOND_OPT_PRIO,
BOND_OPT_LAST
};
/* This structure is used for storing option values and for passing option
* values when changing an option. The logic when used as an arg is as follows:
- * - if string != NULL -> parse it, if the opt is RAW type then return it, else
- * return the parse result
- * - if string == NULL -> parse value
+ * - if value != ULLONG_MAX -> parse value
+ * - if string != NULL -> parse string
+ * - if the opt is RAW data and length less than maxlen,
+ * copy the data to extra storage
*/
+
+#define BOND_OPT_EXTRA_MAXLEN 16
struct bond_opt_value {
char *string;
u64 value;
u32 flags;
+ union {
+ char extra[BOND_OPT_EXTRA_MAXLEN];
+ struct net_device *slave_dev;
+ };
};
struct bonding;
@@ -100,7 +119,8 @@ struct bond_option {
};
int __bond_opt_set(struct bonding *bond, unsigned int option,
- struct bond_opt_value *val);
+ struct bond_opt_value *val,
+ struct nlattr *bad_attr, struct netlink_ext_ack *extack);
int __bond_opt_set_notify(struct bonding *bond, unsigned int option,
struct bond_opt_value *val);
int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf);
@@ -116,18 +136,29 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val);
* When value is ULLONG_MAX then string will be used.
*/
static inline void __bond_opt_init(struct bond_opt_value *optval,
- char *string, u64 value)
+ char *string, u64 value,
+ void *extra, size_t extra_len)
{
memset(optval, 0, sizeof(*optval));
optval->value = ULLONG_MAX;
- if (value == ULLONG_MAX)
- optval->string = string;
- else
+ if (value != ULLONG_MAX)
optval->value = value;
+ else if (string)
+ optval->string = string;
+
+ if (extra && extra_len <= BOND_OPT_EXTRA_MAXLEN)
+ memcpy(optval->extra, extra, extra_len);
}
-#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value)
-#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX)
+#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value, NULL, 0)
+#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX, NULL, 0)
+#define bond_opt_initextra(optval, extra, extra_len) \
+ __bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len)
+#define bond_opt_slave_initval(optval, slave_dev, value) \
+ __bond_opt_init(optval, NULL, value, slave_dev, sizeof(struct net_device *))
void bond_option_arp_ip_targets_clear(struct bonding *bond);
+#if IS_ENABLED(CONFIG_IPV6)
+void bond_option_ns_ip6_targets_clear(struct bonding *bond);
+#endif
#endif /* _NET_BOND_OPTIONS_H */
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 3d56b026bb9e..e999f851738b 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -29,8 +29,11 @@
#include <net/bond_3ad.h>
#include <net/bond_alb.h>
#include <net/bond_options.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
#define BOND_MAX_ARP_TARGETS 16
+#define BOND_MAX_NS_TARGETS BOND_MAX_ARP_TARGETS
#define BOND_DEFAULT_MIIMON 100
@@ -86,6 +89,11 @@
#define bond_for_each_slave_rcu(bond, pos, iter) \
netdev_for_each_lower_private_rcu((bond)->dev, pos, iter)
+#define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \
+ NETIF_F_GSO_ESP)
+
+#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX | NETIF_F_HW_TLS_RX)
+
#ifdef CONFIG_NET_POLL_CONTROLLER
extern atomic_t netpoll_block_tx;
@@ -116,6 +124,7 @@ struct bond_params {
int xmit_policy;
int miimon;
u8 num_peer_notif;
+ u8 missed_max;
int arp_interval;
int arp_validate;
int arp_all_targets;
@@ -124,6 +133,7 @@ struct bond_params {
int updelay;
int downdelay;
int peer_notif_delay;
+ int lacp_active;
int lacp_fast;
unsigned int min_links;
int ad_select;
@@ -139,22 +149,21 @@ struct bond_params {
struct reciprocal_value reciprocal_packets_per_slave;
u16 ad_actor_sys_prio;
u16 ad_user_port_key;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr ns_targets[BOND_MAX_NS_TARGETS];
+#endif
/* 2 bytes of padding : see ether_addr_equal_64bits() */
u8 ad_actor_system[ETH_ALEN + 2];
};
-struct bond_parm_tbl {
- char *modename;
- int mode;
-};
-
struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */
int delay;
- /* all three in jiffies */
+ /* all 4 in jiffies */
unsigned long last_link_up;
+ unsigned long last_tx;
unsigned long last_rx;
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
s8 link; /* one of BOND_LINK_XXXX */
@@ -170,6 +179,7 @@ struct slave {
u32 speed;
u16 queue_id;
u8 perm_hwaddr[MAX_ADDR_LEN];
+ int prio;
struct ad_slave_info *ad_info;
struct tlb_slave_info tlb_info;
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -180,10 +190,15 @@ struct slave {
struct rtnl_link_stats64 slave_stats;
};
+static inline struct slave *to_slave(struct kobject *kobj)
+{
+ return container_of(kobj, struct slave, kobj);
+}
+
struct bond_up_slave {
unsigned int count;
struct rcu_head rcu;
- struct slave *arr[0];
+ struct slave *arr[];
};
/*
@@ -191,6 +206,11 @@ struct bond_up_slave {
*/
#define BOND_LINK_NOCHANGE -1
+struct bond_ipsec {
+ struct list_head list;
+ struct xfrm_state *xs;
+};
+
/*
* Here are the locking policies for the two bonding locks:
* Get rcu_read_lock when reading or RTNL when writing slave list.
@@ -200,7 +220,8 @@ struct bonding {
struct slave __rcu *curr_active_slave;
struct slave __rcu *current_arp_slave;
struct slave __rcu *primary_slave;
- struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
+ struct bond_up_slave __rcu *usable_slaves;
+ struct bond_up_slave __rcu *all_slaves;
bool force_primary;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
@@ -221,7 +242,7 @@ struct bonding {
char proc_file_name[IFNAMSIZ];
#endif /* CONFIG_PROC_FS */
struct list_head bond_list;
- u32 rr_tx_counter;
+ u32 __percpu *rr_tx_counter;
struct ad_bond_info ad_info;
struct alb_bond_info alb_info;
struct bond_params params;
@@ -237,7 +258,12 @@ struct bonding {
struct dentry *debug_dir;
#endif /* CONFIG_DEBUG_FS */
struct rtnl_link_stats64 bond_stats;
- struct lock_class_key stats_lock_key;
+#ifdef CONFIG_XFRM_OFFLOAD
+ struct list_head ipsec_list;
+ /* protecting ipsec_list */
+ spinlock_t ipsec_lock;
+#endif /* CONFIG_XFRM_OFFLOAD */
+ struct bpf_prog *xdp_prog;
};
#define bond_slave_get_rcu(dev) \
@@ -254,6 +280,8 @@ struct bond_vlan_tag {
unsigned short vlan_id;
};
+bool bond_sk_check(struct bonding *bond);
+
/**
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
@@ -326,7 +354,7 @@ static inline bool bond_uses_primary(struct bonding *bond)
static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond)
{
- struct slave *slave = rcu_dereference(bond->curr_active_slave);
+ struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave);
return bond_uses_primary(bond) && slave ? slave->dev : NULL;
}
@@ -479,6 +507,15 @@ static inline int bond_is_ip_target_ok(__be32 addr)
return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int bond_is_ip6_target_ok(struct in6_addr *addr)
+{
+ return !ipv6_addr_any(addr) &&
+ !ipv6_addr_loopback(addr) &&
+ !ipv6_addr_is_multicast(addr);
+}
+#endif
+
/* Get the oldest arp which we've received on this slave for bond's
* arp_targets.
*/
@@ -504,19 +541,28 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
return slave->last_rx;
}
+static inline void slave_update_last_tx(struct slave *slave)
+{
+ WRITE_ONCE(slave->last_tx, jiffies);
+}
+
+static inline unsigned long slave_last_tx(struct slave *slave)
+{
+ return READ_ONCE(slave->last_tx);
+}
+
#ifdef CONFIG_NET_POLL_CONTROLLER
-static inline void bond_netpoll_send_skb(const struct slave *slave,
+static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
struct sk_buff *skb)
{
- struct netpoll *np = slave->np;
-
- if (np)
- netpoll_send_skb(np, skb);
+ return netpoll_send_skb(slave->np, skb);
}
#else
-static inline void bond_netpoll_send_skb(const struct slave *slave,
+static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
struct sk_buff *skb)
{
+ BUG();
+ return NETDEV_TX_OK;
}
#endif
@@ -609,8 +655,8 @@ struct bond_net {
struct class_attribute class_attr_bonding_masters;
};
-int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
-void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
+int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
+netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
int bond_create(struct net *net, const char *name);
int bond_create_sysfs(struct bond_net *net);
void bond_destroy_sysfs(struct bond_net *net);
@@ -680,20 +726,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
}
/* Caller must hold rcu_read_lock() for read */
-static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
- const u8 *mac)
-{
- struct list_head *iter;
- struct slave *tmp;
-
- bond_for_each_slave_rcu(bond, tmp, iter)
- if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
- return tmp;
-
- return NULL;
-}
-
-/* Caller must hold rcu_read_lock() for read */
static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
{
struct list_head *iter;
@@ -730,23 +762,38 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)
return -1;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip)
+{
+ int i;
+
+ for (i = 0; i < BOND_MAX_NS_TARGETS; i++)
+ if (ipv6_addr_equal(&targets[i], ip))
+ return i;
+ else if (ipv6_addr_any(&targets[i]))
+ break;
+
+ return -1;
+}
+#endif
+
/* exported from bond_main.c */
extern unsigned int bond_net_id;
-extern const struct bond_parm_tbl bond_lacp_tbl[];
-extern const struct bond_parm_tbl xmit_hashtype_tbl[];
-extern const struct bond_parm_tbl arp_validate_tbl[];
-extern const struct bond_parm_tbl arp_all_targets_tbl[];
-extern const struct bond_parm_tbl fail_over_mac_tbl[];
-extern const struct bond_parm_tbl pri_reselect_tbl[];
-extern struct bond_parm_tbl ad_select_tbl[];
/* exported from bond_netlink.c */
extern struct rtnl_link_ops bond_link_ops;
-static inline void bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
+/* exported from bond_sysfs_slave.c */
+extern const struct sysfs_ops slave_sysfs_ops;
+
+/* exported from bond_3ad.c */
+extern const u8 lacpdu_mcast_addr[];
+
+static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
{
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
dev_kfree_skb_any(skb);
+ return NET_XMIT_DROP;
}
#endif /* _NET_BONDING_H */
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 8e4f831d2e52..2926f1f00d65 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,21 +3,61 @@
#ifndef _BPF_SK_STORAGE_H
#define _BPF_SK_STORAGE_H
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_local_storage.h>
+
struct sock;
void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+extern const struct bpf_func_proto bpf_sk_storage_get_tracing_proto;
+extern const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto;
+
+struct bpf_local_storage_elem;
+struct bpf_sk_storage_diag;
+struct sk_buff;
+struct nlattr;
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
+struct bpf_sk_storage_diag *
+bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs);
+void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag);
+int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
+ struct sock *sk, struct sk_buff *skb,
+ int stg_array_type,
+ unsigned int *res_diag_size);
#else
static inline int bpf_sk_storage_clone(const struct sock *sk,
struct sock *newsk)
{
return 0;
}
+static inline struct bpf_sk_storage_diag *
+bpf_sk_storage_diag_alloc(const struct nlattr *nla)
+{
+ return NULL;
+}
+static inline void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
+{
+}
+static inline int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
+ struct sock *sk, struct sk_buff *skb,
+ int stg_array_type,
+ unsigned int *res_diag_size)
+{
+ return 0;
+}
#endif
#endif /* _BPF_SK_STORAGE_H */
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 86e028388bad..f90f0021f5f2 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -23,6 +23,8 @@
*/
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
+#define BUSY_POLL_BUDGET 8
+
#ifdef CONFIG_NET_RX_BUSY_POLL
struct napi_struct;
@@ -31,19 +33,19 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
static inline bool net_busy_loop_on(void)
{
- return sysctl_net_busy_poll;
+ return READ_ONCE(sysctl_net_busy_poll);
}
static inline bool sk_can_busy_loop(const struct sock *sk)
{
- return sk->sk_ll_usec && !signal_pending(current);
+ return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current);
}
bool sk_busy_loop_end(void *p, unsigned long start_time);
void napi_busy_loop(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
- void *loop_end_arg);
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget);
#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
@@ -105,7 +107,9 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
if (napi_id >= MIN_NAPI_ID)
- napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk);
+ napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
+ READ_ONCE(sk->sk_prefer_busy_poll),
+ READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
#endif
}
@@ -114,7 +118,11 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
struct napi_struct *napi)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- skb->napi_id = napi->napi_id;
+ /* If the skb was already marked with a valid NAPI ID, avoid overwriting
+ * it.
+ */
+ if (skb->napi_id < MIN_NAPI_ID)
+ skb->napi_id = napi->napi_id;
#endif
}
@@ -122,18 +130,47 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
+ if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id))
+ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
+#endif
+ sk_rx_queue_update(sk, skb);
+}
+
+/* Variant of sk_mark_napi_id() for passive flow setup,
+ * as sk->sk_napi_id and sk->sk_rx_queue_mapping content
+ * needs to be set.
+ */
+static inline void sk_mark_napi_id_set(struct sock *sk,
+ const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
sk_rx_queue_set(sk, skb);
}
+static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ if (!READ_ONCE(sk->sk_napi_id))
+ WRITE_ONCE(sk->sk_napi_id, napi_id);
+#endif
+}
+
/* variant used for unconnected sockets */
static inline void sk_mark_napi_id_once(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- if (!READ_ONCE(sk->sk_napi_id))
- WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
+ __sk_mark_napi_id_once(sk, skb->napi_id);
+#endif
+}
+
+static inline void sk_mark_napi_id_once_xdp(struct sock *sk,
+ const struct xdp_buff *xdp)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ __sk_mark_napi_id_once(sk, xdp->rxq->napi_id);
#endif
}
diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 48ecca8530ff..b655d8666f55 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -119,7 +119,7 @@ void caif_free_client(struct cflayer *adap_layer);
* The link_support layer is used to add any Link Layer specific
* framing.
*/
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
struct cflayer *link_support, int head_room,
struct cflayer **layer, int (**rcv_func)(
struct sk_buff *, struct net_device *,
diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h
deleted file mode 100644
index 552cf68d28d2..000000000000
--- a/include/net/caif/caif_hsi.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author: Daniel Martensson / daniel.martensson@stericsson.com
- * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com
- */
-
-#ifndef CAIF_HSI_H_
-#define CAIF_HSI_H_
-
-#include <net/caif/caif_layer.h>
-#include <net/caif/caif_device.h>
-#include <linux/atomic.h>
-
-/*
- * Maximum number of CAIF frames that can reside in the same HSI frame.
- */
-#define CFHSI_MAX_PKTS 15
-
-/*
- * Maximum number of bytes used for the frame that can be embedded in the
- * HSI descriptor.
- */
-#define CFHSI_MAX_EMB_FRM_SZ 96
-
-/*
- * Decides if HSI buffers should be prefilled with 0xFF pattern for easier
- * debugging. Both TX and RX buffers will be filled before the transfer.
- */
-#define CFHSI_DBG_PREFILL 0
-
-/* Structure describing a HSI packet descriptor. */
-#pragma pack(1) /* Byte alignment. */
-struct cfhsi_desc {
- u8 header;
- u8 offset;
- u16 cffrm_len[CFHSI_MAX_PKTS];
- u8 emb_frm[CFHSI_MAX_EMB_FRM_SZ];
-};
-#pragma pack() /* Default alignment. */
-
-/* Size of the complete HSI packet descriptor. */
-#define CFHSI_DESC_SZ (sizeof(struct cfhsi_desc))
-
-/*
- * Size of the complete HSI packet descriptor excluding the optional embedded
- * CAIF frame.
- */
-#define CFHSI_DESC_SHORT_SZ (CFHSI_DESC_SZ - CFHSI_MAX_EMB_FRM_SZ)
-
-/*
- * Maximum bytes transferred in one transfer.
- */
-#define CFHSI_MAX_CAIF_FRAME_SZ 4096
-
-#define CFHSI_MAX_PAYLOAD_SZ (CFHSI_MAX_PKTS * CFHSI_MAX_CAIF_FRAME_SZ)
-
-/* Size of the complete HSI TX buffer. */
-#define CFHSI_BUF_SZ_TX (CFHSI_DESC_SZ + CFHSI_MAX_PAYLOAD_SZ)
-
-/* Size of the complete HSI RX buffer. */
-#define CFHSI_BUF_SZ_RX ((2 * CFHSI_DESC_SZ) + CFHSI_MAX_PAYLOAD_SZ)
-
-/* Bitmasks for the HSI descriptor. */
-#define CFHSI_PIGGY_DESC (0x01 << 7)
-
-#define CFHSI_TX_STATE_IDLE 0
-#define CFHSI_TX_STATE_XFER 1
-
-#define CFHSI_RX_STATE_DESC 0
-#define CFHSI_RX_STATE_PAYLOAD 1
-
-/* Bitmasks for power management. */
-#define CFHSI_WAKE_UP 0
-#define CFHSI_WAKE_UP_ACK 1
-#define CFHSI_WAKE_DOWN_ACK 2
-#define CFHSI_AWAKE 3
-#define CFHSI_WAKELOCK_HELD 4
-#define CFHSI_SHUTDOWN 5
-#define CFHSI_FLUSH_FIFO 6
-
-#ifndef CFHSI_INACTIVITY_TOUT
-#define CFHSI_INACTIVITY_TOUT (1 * HZ)
-#endif /* CFHSI_INACTIVITY_TOUT */
-
-#ifndef CFHSI_WAKE_TOUT
-#define CFHSI_WAKE_TOUT (3 * HZ)
-#endif /* CFHSI_WAKE_TOUT */
-
-#ifndef CFHSI_MAX_RX_RETRIES
-#define CFHSI_MAX_RX_RETRIES (10 * HZ)
-#endif
-
-/* Structure implemented by the CAIF HSI driver. */
-struct cfhsi_cb_ops {
- void (*tx_done_cb) (struct cfhsi_cb_ops *drv);
- void (*rx_done_cb) (struct cfhsi_cb_ops *drv);
- void (*wake_up_cb) (struct cfhsi_cb_ops *drv);
- void (*wake_down_cb) (struct cfhsi_cb_ops *drv);
-};
-
-/* Structure implemented by HSI device. */
-struct cfhsi_ops {
- int (*cfhsi_up) (struct cfhsi_ops *dev);
- int (*cfhsi_down) (struct cfhsi_ops *dev);
- int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_ops *dev);
- int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_ops *dev);
- int (*cfhsi_wake_up) (struct cfhsi_ops *dev);
- int (*cfhsi_wake_down) (struct cfhsi_ops *dev);
- int (*cfhsi_get_peer_wake) (struct cfhsi_ops *dev, bool *status);
- int (*cfhsi_fifo_occupancy) (struct cfhsi_ops *dev, size_t *occupancy);
- int (*cfhsi_rx_cancel)(struct cfhsi_ops *dev);
- struct cfhsi_cb_ops *cb_ops;
-};
-
-/* Structure holds status of received CAIF frames processing */
-struct cfhsi_rx_state {
- int state;
- int nfrms;
- int pld_len;
- int retries;
- bool piggy_desc;
-};
-
-/* Priority mapping */
-enum {
- CFHSI_PRIO_CTL = 0,
- CFHSI_PRIO_VI,
- CFHSI_PRIO_VO,
- CFHSI_PRIO_BEBK,
- CFHSI_PRIO_LAST,
-};
-
-struct cfhsi_config {
- u32 inactivity_timeout;
- u32 aggregation_timeout;
- u32 head_align;
- u32 tail_align;
- u32 q_high_mark;
- u32 q_low_mark;
-};
-
-/* Structure implemented by CAIF HSI drivers. */
-struct cfhsi {
- struct caif_dev_common cfdev;
- struct net_device *ndev;
- struct platform_device *pdev;
- struct sk_buff_head qhead[CFHSI_PRIO_LAST];
- struct cfhsi_cb_ops cb_ops;
- struct cfhsi_ops *ops;
- int tx_state;
- struct cfhsi_rx_state rx_state;
- struct cfhsi_config cfg;
- int rx_len;
- u8 *rx_ptr;
- u8 *tx_buf;
- u8 *rx_buf;
- u8 *rx_flip_buf;
- spinlock_t lock;
- int flow_off_sent;
- struct list_head list;
- struct work_struct wake_up_work;
- struct work_struct wake_down_work;
- struct work_struct out_of_sync_work;
- struct workqueue_struct *wq;
- wait_queue_head_t wake_up_wait;
- wait_queue_head_t wake_down_wait;
- wait_queue_head_t flush_fifo_wait;
- struct timer_list inactivity_timer;
- struct timer_list rx_slowpath_timer;
-
- /* TX aggregation */
- int aggregation_len;
- struct timer_list aggregation_timer;
-
- unsigned long bits;
-};
-extern struct platform_driver cfhsi_driver;
-
-/**
- * enum ifla_caif_hsi - CAIF HSI NetlinkRT parameters.
- * @IFLA_CAIF_HSI_INACTIVITY_TOUT: Inactivity timeout before
- * taking the HSI wakeline down, in milliseconds.
- * When using RT Netlink to create, destroy or configure a CAIF HSI interface,
- * enum ifla_caif_hsi is used to specify the configuration attributes.
- */
-enum ifla_caif_hsi {
- __IFLA_CAIF_HSI_UNSPEC,
- __IFLA_CAIF_HSI_INACTIVITY_TOUT,
- __IFLA_CAIF_HSI_AGGREGATION_TOUT,
- __IFLA_CAIF_HSI_HEAD_ALIGN,
- __IFLA_CAIF_HSI_TAIL_ALIGN,
- __IFLA_CAIF_HSI_QHIGH_WATERMARK,
- __IFLA_CAIF_HSI_QLOW_WATERMARK,
- __IFLA_CAIF_HSI_MAX
-};
-
-struct cfhsi_ops *cfhsi_get_ops(void);
-
-#endif /* CAIF_HSI_H_ */
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 064094101cb5..51f7bb42a936 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -156,7 +156,7 @@ struct cflayer {
* CAIF packets upwards in the stack.
* Packet handling rules:
* - The CAIF packet (cfpkt) ownership is passed to the
- * called receive function. This means that the the
+ * called receive function. This means that the
* packet cannot be accessed after passing it to the
* above layer using up->receive().
*
@@ -184,7 +184,7 @@ struct cflayer {
* CAIF packet downwards in the stack.
* Packet handling rules:
* - The CAIF packet (cfpkt) ownership is passed to the
- * transmit function. This means that the the packet
+ * transmit function. This means that the packet
* cannot be accessed after passing it to the below
* layer using dn->transmit().
*
diff --git a/include/net/caif/caif_spi.h b/include/net/caif/caif_spi.h
deleted file mode 100644
index a0bf4cbce71b..000000000000
--- a/include/net/caif/caif_spi.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author: Daniel Martensson / Daniel.Martensson@stericsson.com
- */
-
-#ifndef CAIF_SPI_H_
-#define CAIF_SPI_H_
-
-#include <net/caif/caif_device.h>
-
-#define SPI_CMD_WR 0x00
-#define SPI_CMD_RD 0x01
-#define SPI_CMD_EOT 0x02
-#define SPI_CMD_IND 0x04
-
-#define SPI_DMA_BUF_LEN 8192
-
-#define WL_SZ 2 /* 16 bits. */
-#define SPI_CMD_SZ 4 /* 32 bits. */
-#define SPI_IND_SZ 4 /* 32 bits. */
-
-#define SPI_XFER 0
-#define SPI_SS_ON 1
-#define SPI_SS_OFF 2
-#define SPI_TERMINATE 3
-
-/* Minimum time between different levels is 50 microseconds. */
-#define MIN_TRANSITION_TIME_USEC 50
-
-/* Defines for calculating duration of SPI transfers for a particular
- * number of bytes.
- */
-#define SPI_MASTER_CLK_MHZ 13
-#define SPI_XFER_TIME_USEC(bytes, clk) (((bytes) * 8) / clk)
-
-/* Normally this should be aligned on the modem in order to benefit from full
- * duplex transfers. However a size of 8188 provokes errors when running with
- * the modem. These errors occur when packet sizes approaches 4 kB of data.
- */
-#define CAIF_MAX_SPI_FRAME 4092
-
-/* Maximum number of uplink CAIF frames that can reside in the same SPI frame.
- * This number should correspond with the modem setting. The application side
- * CAIF accepts any number of embedded downlink CAIF frames.
- */
-#define CAIF_MAX_SPI_PKTS 9
-
-/* Decides if SPI buffers should be prefilled with 0xFF pattern for easier
- * debugging. Both TX and RX buffers will be filled before the transfer.
- */
-#define CFSPI_DBG_PREFILL 0
-
-/* Structure describing a SPI transfer. */
-struct cfspi_xfer {
- u16 tx_dma_len;
- u16 rx_dma_len;
- void *va_tx[2];
- dma_addr_t pa_tx[2];
- void *va_rx;
- dma_addr_t pa_rx;
-};
-
-/* Structure implemented by the SPI interface. */
-struct cfspi_ifc {
- void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
- void (*xfer_done_cb) (struct cfspi_ifc *ifc);
- void *priv;
-};
-
-/* Structure implemented by SPI clients. */
-struct cfspi_dev {
- int (*init_xfer) (struct cfspi_xfer *xfer, struct cfspi_dev *dev);
- void (*sig_xfer) (bool xfer, struct cfspi_dev *dev);
- struct cfspi_ifc *ifc;
- char *name;
- u32 clk_mhz;
- void *priv;
-};
-
-/* Enumeration describing the CAIF SPI state. */
-enum cfspi_state {
- CFSPI_STATE_WAITING = 0,
- CFSPI_STATE_AWAKE,
- CFSPI_STATE_FETCH_PKT,
- CFSPI_STATE_GET_NEXT,
- CFSPI_STATE_INIT_XFER,
- CFSPI_STATE_WAIT_ACTIVE,
- CFSPI_STATE_SIG_ACTIVE,
- CFSPI_STATE_WAIT_XFER_DONE,
- CFSPI_STATE_XFER_DONE,
- CFSPI_STATE_WAIT_INACTIVE,
- CFSPI_STATE_SIG_INACTIVE,
- CFSPI_STATE_DELIVER_PKT,
- CFSPI_STATE_MAX,
-};
-
-/* Structure implemented by SPI physical interfaces. */
-struct cfspi {
- struct caif_dev_common cfdev;
- struct net_device *ndev;
- struct platform_device *pdev;
- struct sk_buff_head qhead;
- struct sk_buff_head chead;
- u16 cmd;
- u16 tx_cpck_len;
- u16 tx_npck_len;
- u16 rx_cpck_len;
- u16 rx_npck_len;
- struct cfspi_ifc ifc;
- struct cfspi_xfer xfer;
- struct cfspi_dev *dev;
- unsigned long state;
- struct work_struct work;
- struct workqueue_struct *wq;
- struct list_head list;
- int flow_off_sent;
- u32 qd_low_mark;
- u32 qd_high_mark;
- struct completion comp;
- wait_queue_head_t wait;
- spinlock_t lock;
- bool flow_stop;
- bool slave;
- bool slave_talked;
-#ifdef CONFIG_DEBUG_FS
- enum cfspi_state dbg_state;
- u16 pcmd;
- u16 tx_ppck_len;
- u16 rx_ppck_len;
- struct dentry *dbgfs_dir;
- struct dentry *dbgfs_state;
- struct dentry *dbgfs_frame;
-#endif /* CONFIG_DEBUG_FS */
-};
-
-extern int spi_frm_align;
-extern int spi_up_head_align;
-extern int spi_up_tail_align;
-extern int spi_down_head_align;
-extern int spi_down_tail_align;
-extern struct platform_driver cfspi_spi_driver;
-
-void cfspi_dbg_state(struct cfspi *cfspi, int state);
-int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-int cfspi_xmitlen(struct cfspi *cfspi);
-int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-int cfspi_spi_remove(struct platform_device *pdev);
-int cfspi_spi_probe(struct platform_device *pdev);
-int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-int cfspi_xmitlen(struct cfspi *cfspi);
-int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-void cfspi_xfer(struct work_struct *work);
-
-#endif /* CAIF_SPI_H_ */
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 2aa5e91d8457..8819ff4db35a 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -62,7 +62,7 @@ void cfcnfg_remove(struct cfcnfg *cfg);
* @fcs: Specify if checksum is used in CAIF Framing Layer.
* @head_room: Head space needed by link specific protocol.
*/
-void
+int
cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
struct net_device *dev, struct cflayer *phy_layer,
enum cfcnfg_phy_preference pref,
diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h
index 14a55e03bb3c..67cce8757175 100644
--- a/include/net/caif/cfserl.h
+++ b/include/net/caif/cfserl.h
@@ -9,4 +9,5 @@
#include <net/caif/caif_layer.h>
struct cflayer *cfserl_create(int instance, bool use_stx);
+void cfserl_release(struct cflayer *layer);
#endif
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f22bd6c838a3..e09ff87146c1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,9 +7,11 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
+#include <linux/ethtool.h>
+#include <uapi/linux/rfkill.h>
#include <linux/netdevice.h>
#include <linux/debugfs.h>
#include <linux/list.h>
@@ -20,6 +22,7 @@
#include <linux/if_ether.h>
#include <linux/ieee80211.h>
#include <linux/net.h>
+#include <linux/rfkill.h>
#include <net/regulatory.h>
/**
@@ -72,12 +75,12 @@ struct wiphy;
*
* @IEEE80211_CHAN_DISABLED: This channel is disabled.
* @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes
- * sending probe requests or beaconing.
+ * sending probe requests or beaconing.
* @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
* @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel
- * is not permitted.
+ * is not permitted.
* @IEEE80211_CHAN_NO_HT40MINUS: extension channel below this channel
- * is not permitted.
+ * is not permitted.
* @IEEE80211_CHAN_NO_OFDM: OFDM is not allowed on this channel.
* @IEEE80211_CHAN_NO_80MHZ: If the driver supports 80 MHz on the band,
* this flag indicates that an 80 MHz channel cannot use this
@@ -95,7 +98,23 @@ struct wiphy;
* on this channel.
* @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
* on this channel.
- *
+ * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel.
+ * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted
+ * on this channel.
+ * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted
+ * on this channel.
+ * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted
+ * on this channel.
+ * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted
+ * on this channel.
+ * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted
+ * on this channel.
+ * @IEEE80211_CHAN_NO_320MHZ: If the driver supports 320 MHz on the band,
+ * this flag indicates that a 320 MHz channel cannot use this
+ * channel as the control or any of the secondary channels.
+ * This may be due to the driver or due to regulatory bandwidth
+ * restrictions.
+ * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel.
*/
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = 1<<0,
@@ -111,6 +130,14 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_IR_CONCURRENT = 1<<10,
IEEE80211_CHAN_NO_20MHZ = 1<<11,
IEEE80211_CHAN_NO_10MHZ = 1<<12,
+ IEEE80211_CHAN_NO_HE = 1<<13,
+ IEEE80211_CHAN_1MHZ = 1<<14,
+ IEEE80211_CHAN_2MHZ = 1<<15,
+ IEEE80211_CHAN_4MHZ = 1<<16,
+ IEEE80211_CHAN_8MHZ = 1<<17,
+ IEEE80211_CHAN_16MHZ = 1<<18,
+ IEEE80211_CHAN_NO_320MHZ = 1<<19,
+ IEEE80211_CHAN_NO_EHT = 1<<20,
};
#define IEEE80211_CHAN_NO_HT40 \
@@ -126,6 +153,7 @@ enum ieee80211_channel_flags {
* with cfg80211.
*
* @center_freq: center frequency in MHz
+ * @freq_offset: offset from @center_freq, in KHz
* @hw_value: hardware-specific value for the channel
* @flags: channel flags from &enum ieee80211_channel_flags.
* @orig_flags: channel flags at registration time, used by regulatory
@@ -147,6 +175,7 @@ enum ieee80211_channel_flags {
struct ieee80211_channel {
enum nl80211_band band;
u32 center_freq;
+ u16 freq_offset;
u16 hw_value;
u32 flags;
int max_antenna_gain;
@@ -250,13 +279,36 @@ struct ieee80211_rate {
* struct ieee80211_he_obss_pd - AP settings for spatial reuse
*
* @enable: is the feature enabled.
+ * @sr_ctrl: The SR Control field of SRP element.
+ * @non_srg_max_offset: non-SRG maximum tx power offset
* @min_offset: minimal tx power offset an associated station shall use
* @max_offset: maximum tx power offset an associated station shall use
+ * @bss_color_bitmap: bitmap that indicates the BSS color values used by
+ * members of the SRG
+ * @partial_bssid_bitmap: bitmap that indicates the partial BSSID values
+ * used by members of the SRG
*/
struct ieee80211_he_obss_pd {
bool enable;
+ u8 sr_ctrl;
+ u8 non_srg_max_offset;
u8 min_offset;
u8 max_offset;
+ u8 bss_color_bitmap[8];
+ u8 partial_bssid_bitmap[8];
+};
+
+/**
+ * struct cfg80211_he_bss_color - AP settings for BSS coloring
+ *
+ * @color: the current color.
+ * @enabled: HE BSS color is used
+ * @partial: define the AID equation.
+ */
+struct cfg80211_he_bss_color {
+ u8 color;
+ bool enabled;
+ bool partial;
};
/**
@@ -316,7 +368,50 @@ struct ieee80211_sta_he_cap {
};
/**
- * struct ieee80211_sband_iftype_data
+ * struct ieee80211_eht_mcs_nss_supp - EHT max supported NSS per MCS
+ *
+ * See P802.11be_D1.3 Table 9-401k - "Subfields of the Supported EHT-MCS
+ * and NSS Set field"
+ *
+ * @only_20mhz: MCS/NSS support for 20 MHz-only STA.
+ * @bw: MCS/NSS support for 80, 160 and 320 MHz
+ * @bw._80: MCS/NSS support for BW <= 80 MHz
+ * @bw._160: MCS/NSS support for BW = 160 MHz
+ * @bw._320: MCS/NSS support for BW = 320 MHz
+ */
+struct ieee80211_eht_mcs_nss_supp {
+ union {
+ struct ieee80211_eht_mcs_nss_supp_20mhz_only only_20mhz;
+ struct {
+ struct ieee80211_eht_mcs_nss_supp_bw _80;
+ struct ieee80211_eht_mcs_nss_supp_bw _160;
+ struct ieee80211_eht_mcs_nss_supp_bw _320;
+ } __packed bw;
+ } __packed;
+} __packed;
+
+#define IEEE80211_EHT_PPE_THRES_MAX_LEN 32
+
+/**
+ * struct ieee80211_sta_eht_cap - STA's EHT capabilities
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11be EHT capabilities for a STA.
+ *
+ * @has_eht: true iff EHT data is valid.
+ * @eht_cap_elem: Fixed portion of the eht capabilities element.
+ * @eht_mcs_nss_supp: The supported NSS/MCS combinations.
+ * @eht_ppe_thres: Holds the PPE Thresholds data.
+ */
+struct ieee80211_sta_eht_cap {
+ bool has_eht;
+ struct ieee80211_eht_cap_elem_fixed eht_cap_elem;
+ struct ieee80211_eht_mcs_nss_supp eht_mcs_nss_supp;
+ u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN];
+};
+
+/**
+ * struct ieee80211_sband_iftype_data - sband data per interface type
*
* This structure encapsulates sband data that is relevant for the
* interface types defined in @types_mask. Each type in the
@@ -324,10 +419,22 @@ struct ieee80211_sta_he_cap {
*
* @types_mask: interface types mask
* @he_cap: holds the HE capabilities
+ * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a
+ * 6 GHz band channel (and 0 may be valid value).
+ * @eht_cap: STA's EHT capabilities
+ * @vendor_elems: vendor element(s) to advertise
+ * @vendor_elems.data: vendor element(s) data
+ * @vendor_elems.len: vendor element(s) length
*/
struct ieee80211_sband_iftype_data {
u16 types_mask;
struct ieee80211_sta_he_cap he_cap;
+ struct ieee80211_he_6ghz_capa he_6ghz_capa;
+ struct ieee80211_sta_eht_cap eht_cap;
+ struct {
+ const u8 *data;
+ unsigned int len;
+ } vendor_elems;
};
/**
@@ -385,12 +492,28 @@ struct ieee80211_edmg {
};
/**
+ * struct ieee80211_sta_s1g_cap - STA's S1G capabilities
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11ah S1G capabilities for a STA.
+ *
+ * @s1g: is STA an S1G STA
+ * @cap: S1G capabilities information
+ * @nss_mcs: Supported NSS MCS set
+ */
+struct ieee80211_sta_s1g_cap {
+ bool s1g;
+ u8 cap[10]; /* use S1G_CAPAB_ */
+ u8 nss_mcs[5];
+};
+
+/**
* struct ieee80211_supported_band - frequency band definition
*
* This structure describes a frequency band a wiphy
* is able to operate in.
*
- * @channels: Array of channels the hardware can operate in
+ * @channels: Array of channels the hardware can operate with
* in this band.
* @band: the band this structure represents
* @n_channels: Number of channels in @channels
@@ -400,7 +523,9 @@ struct ieee80211_edmg {
* @n_bitrates: Number of bitrates in @bitrates
* @ht_cap: HT capabilities in this band
* @vht_cap: VHT capabilities in this band
+ * @s1g_cap: S1G capabilities in this band
* @edmg_cap: EDMG capabilities in this band
+ * @s1g_cap: S1G capabilities in this band (S1B band only, of course)
* @n_iftype_data: number of iftype data entries
* @iftype_data: interface type data entries. Note that the bits in
* @types_mask inside this structure cannot overlap (i.e. only
@@ -415,6 +540,7 @@ struct ieee80211_supported_band {
int n_bitrates;
struct ieee80211_sta_ht_cap ht_cap;
struct ieee80211_sta_vht_cap vht_cap;
+ struct ieee80211_sta_s1g_cap s1g_cap;
struct ieee80211_edmg edmg_cap;
u16 n_iftype_data;
const struct ieee80211_sband_iftype_data *iftype_data;
@@ -468,15 +594,43 @@ ieee80211_get_he_iftype_cap(const struct ieee80211_supported_band *sband,
}
/**
- * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
+ * ieee80211_get_he_6ghz_capa - return HE 6 GHz capabilities
* @sband: the sband to search for the STA on
+ * @iftype: the iftype to search for
*
- * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
+ * Return: the 6GHz capabilities
*/
-static inline const struct ieee80211_sta_he_cap *
-ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
+static inline __le16
+ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband,
+ enum nl80211_iftype iftype)
{
- return ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_STATION);
+ const struct ieee80211_sband_iftype_data *data =
+ ieee80211_get_sband_iftype_data(sband, iftype);
+
+ if (WARN_ON(!data || !data->he_cap.has_he))
+ return 0;
+
+ return data->he_6ghz_capa.capa;
+}
+
+/**
+ * ieee80211_get_eht_iftype_cap - return ETH capabilities for an sband's iftype
+ * @sband: the sband to search for the iftype on
+ * @iftype: enum nl80211_iftype
+ *
+ * Return: pointer to the struct ieee80211_sta_eht_cap, or NULL is none found
+ */
+static inline const struct ieee80211_sta_eht_cap *
+ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband,
+ enum nl80211_iftype iftype)
+{
+ const struct ieee80211_sband_iftype_data *data =
+ ieee80211_get_sband_iftype_data(sband, iftype);
+
+ if (data && data->eht_cap.has_eht)
+ return &data->eht_cap;
+
+ return NULL;
}
/**
@@ -589,6 +743,7 @@ struct key_params {
* If edmg is requested (i.e. the .channels member is non-zero),
* chan will define the primary channel and all other
* parameters are ignored.
+ * @freq1_offset: offset from @center_freq1, in KHz
*/
struct cfg80211_chan_def {
struct ieee80211_channel *chan;
@@ -596,6 +751,80 @@ struct cfg80211_chan_def {
u32 center_freq1;
u32 center_freq2;
struct ieee80211_edmg edmg;
+ u16 freq1_offset;
+};
+
+/*
+ * cfg80211_bitrate_mask - masks for bitrate control
+ */
+struct cfg80211_bitrate_mask {
+ struct {
+ u32 legacy;
+ u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
+ u16 vht_mcs[NL80211_VHT_NSS_MAX];
+ u16 he_mcs[NL80211_HE_NSS_MAX];
+ enum nl80211_txrate_gi gi;
+ enum nl80211_he_gi he_gi;
+ enum nl80211_he_ltf he_ltf;
+ } control[NUM_NL80211_BANDS];
+};
+
+
+/**
+ * struct cfg80211_tid_cfg - TID specific configuration
+ * @config_override: Flag to notify driver to reset TID configuration
+ * of the peer.
+ * @tids: bitmap of TIDs to modify
+ * @mask: bitmap of attributes indicating which parameter changed,
+ * similar to &nl80211_tid_config_supp.
+ * @noack: noack configuration value for the TID
+ * @retry_long: retry count value
+ * @retry_short: retry count value
+ * @ampdu: Enable/Disable MPDU aggregation
+ * @rtscts: Enable/Disable RTS/CTS
+ * @amsdu: Enable/Disable MSDU aggregation
+ * @txrate_type: Tx bitrate mask type
+ * @txrate_mask: Tx bitrate to be applied for the TID
+ */
+struct cfg80211_tid_cfg {
+ bool config_override;
+ u8 tids;
+ u64 mask;
+ enum nl80211_tid_config noack;
+ u8 retry_long, retry_short;
+ enum nl80211_tid_config ampdu;
+ enum nl80211_tid_config rtscts;
+ enum nl80211_tid_config amsdu;
+ enum nl80211_tx_rate_setting txrate_type;
+ struct cfg80211_bitrate_mask txrate_mask;
+};
+
+/**
+ * struct cfg80211_tid_config - TID configuration
+ * @peer: Station's MAC address
+ * @n_tid_conf: Number of TID specific configurations to be applied
+ * @tid_conf: Configuration change info
+ */
+struct cfg80211_tid_config {
+ const u8 *peer;
+ u32 n_tid_conf;
+ struct cfg80211_tid_cfg tid_conf[];
+};
+
+/**
+ * struct cfg80211_fils_aad - FILS AAD data
+ * @macaddr: STA MAC address
+ * @kek: FILS KEK
+ * @kek_len: FILS KEK length
+ * @snonce: STA Nonce
+ * @anonce: AP Nonce
+ */
+struct cfg80211_fils_aad {
+ const u8 *macaddr;
+ const u8 *kek;
+ u8 kek_len;
+ const u8 *snonce;
+ const u8 *anonce;
};
/**
@@ -650,6 +879,7 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
return (chandef1->chan == chandef2->chan &&
chandef1->width == chandef2->width &&
chandef1->center_freq1 == chandef2->center_freq1 &&
+ chandef1->freq1_offset == chandef2->freq1_offset &&
chandef1->center_freq2 == chandef2->center_freq2);
}
@@ -709,19 +939,18 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
enum nl80211_iftype iftype);
/**
- * ieee80211_chandef_rate_flags - returns rate flags for a channel
+ * ieee80211_chanwidth_rate_flags - return rate flags for channel width
+ * @width: the channel width of the channel
*
* In some channel types, not all rates may be used - for example CCK
* rates may not be used in 5/10 MHz channels.
*
- * @chandef: channel definition for the channel
- *
- * Returns: rate flags which apply for this channel
+ * Returns: rate flags which apply for this channel width
*/
static inline enum ieee80211_rate_flags
-ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef)
+ieee80211_chanwidth_rate_flags(enum nl80211_chan_width width)
{
- switch (chandef->width) {
+ switch (width) {
case NL80211_CHAN_WIDTH_5:
return IEEE80211_RATE_SUPPORTS_5MHZ;
case NL80211_CHAN_WIDTH_10:
@@ -733,6 +962,20 @@ ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef)
}
/**
+ * ieee80211_chandef_rate_flags - returns rate flags for a channel
+ * @chandef: channel definition for the channel
+ *
+ * See ieee80211_chanwidth_rate_flags().
+ *
+ * Returns: rate flags which apply for this channel
+ */
+static inline enum ieee80211_rate_flags
+ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef)
+{
+ return ieee80211_chanwidth_rate_flags(chandef->width);
+}
+
+/**
* ieee80211_chandef_max_power - maximum transmission power for the chandef
*
* In some regulations, the transmit power may depend on the configured channel
@@ -760,6 +1003,17 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
}
/**
+ * cfg80211_any_usable_channels - check for usable channels
+ * @wiphy: the wiphy to check for
+ * @band_mask: which bands to check on
+ * @prohibited_flags: which channels to not consider usable,
+ * %IEEE80211_CHAN_DISABLED is always taken into account
+ */
+bool cfg80211_any_usable_channels(struct wiphy *wiphy,
+ unsigned long band_mask,
+ u32 prohibited_flags);
+
+/**
* enum survey_info_flags - survey information flags
*
* @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
@@ -822,6 +1076,7 @@ struct survey_info {
};
#define CFG80211_MAX_WEP_KEYS 4
+#define CFG80211_MAX_NUM_AKM_SUITES 10
/**
* struct cfg80211_crypto_settings - Crypto settings
@@ -842,6 +1097,8 @@ struct survey_info {
* protocol frames.
* @control_port_over_nl80211: TRUE if userspace expects to exchange control
* port frames over NL80211 instead of the network interface.
+ * @control_port_no_preauth: disables pre-auth rx over the nl80211 control
+ * port for mac80211
* @wep_keys: static WEP keys, if not NULL points to an array of
* CFG80211_MAX_WEP_KEYS WEP keys
* @wep_tx_key: key index (0..3) of the default TX static WEP key
@@ -849,6 +1106,21 @@ struct survey_info {
* @sae_pwd: password for SAE authentication (for devices supporting SAE
* offload)
* @sae_pwd_len: length of SAE password (for devices supporting SAE offload)
+ * @sae_pwe: The mechanisms allowed for SAE PWE derivation:
+ *
+ * NL80211_SAE_PWE_UNSPECIFIED
+ * Not-specified, used to indicate userspace did not specify any
+ * preference. The driver should follow its internal policy in
+ * such a scenario.
+ *
+ * NL80211_SAE_PWE_HUNT_AND_PECK
+ * Allow hunting-and-pecking loop only
+ *
+ * NL80211_SAE_PWE_HASH_TO_ELEMENT
+ * Allow hash-to-element only
+ *
+ * NL80211_SAE_PWE_BOTH
+ * Allow either hunting-and-pecking loop or hash-to-element
*/
struct cfg80211_crypto_settings {
u32 wpa_versions;
@@ -856,20 +1128,53 @@ struct cfg80211_crypto_settings {
int n_ciphers_pairwise;
u32 ciphers_pairwise[NL80211_MAX_NR_CIPHER_SUITES];
int n_akm_suites;
- u32 akm_suites[NL80211_MAX_NR_AKM_SUITES];
+ u32 akm_suites[CFG80211_MAX_NUM_AKM_SUITES];
bool control_port;
__be16 control_port_ethertype;
bool control_port_no_encrypt;
bool control_port_over_nl80211;
+ bool control_port_no_preauth;
struct key_params *wep_keys;
int wep_tx_key;
const u8 *psk;
const u8 *sae_pwd;
u8 sae_pwd_len;
+ enum nl80211_sae_pwe_mechanism sae_pwe;
+};
+
+/**
+ * struct cfg80211_mbssid_config - AP settings for multi bssid
+ *
+ * @tx_wdev: pointer to the transmitted interface in the MBSSID set
+ * @index: index of this AP in the multi bssid group.
+ * @ema: set to true if the beacons should be sent out in EMA mode.
+ */
+struct cfg80211_mbssid_config {
+ struct wireless_dev *tx_wdev;
+ u8 index;
+ bool ema;
+};
+
+/**
+ * struct cfg80211_mbssid_elems - Multiple BSSID elements
+ *
+ * @cnt: Number of elements in array %elems.
+ *
+ * @elem: Array of multiple BSSID element(s) to be added into Beacon frames.
+ * @elem.data: Data for multiple BSSID elements.
+ * @elem.len: Length of data.
+ */
+struct cfg80211_mbssid_elems {
+ u8 cnt;
+ struct {
+ const u8 *data;
+ size_t len;
+ } elem[];
};
/**
* struct cfg80211_beacon_data - beacon data
+ * @link_id: the link ID for the AP MLD link sending this beacon
* @head: head portion of beacon (before TIM IE)
* or %NULL if not changed
* @tail: tail portion of beacon (after TIM IE)
@@ -886,6 +1191,7 @@ struct cfg80211_crypto_settings {
* @assocresp_ies_len: length of assocresp_ies in octets
* @probe_resp_len: length of probe response template (@probe_resp)
* @probe_resp: probe response template (AP mode only)
+ * @mbssid_ies: multiple BSSID elements
* @ftm_responder: enable FTM responder functionality; -1 for no change
* (which also implies no change in LCI/civic location data)
* @lci: Measurement Report element content, starting with Measurement Token
@@ -894,8 +1200,13 @@ struct cfg80211_crypto_settings {
* Token (measurement type 11)
* @lci_len: LCI data length
* @civicloc_len: Civic location data length
+ * @he_bss_color: BSS Color settings
+ * @he_bss_color_valid: indicates whether bss color
+ * attribute is present in beacon data or not.
*/
struct cfg80211_beacon_data {
+ unsigned int link_id;
+
const u8 *head, *tail;
const u8 *beacon_ies;
const u8 *proberesp_ies;
@@ -903,6 +1214,7 @@ struct cfg80211_beacon_data {
const u8 *probe_resp;
const u8 *lci;
const u8 *civicloc;
+ struct cfg80211_mbssid_elems *mbssid_ies;
s8 ftm_responder;
size_t head_len, tail_len;
@@ -912,6 +1224,8 @@ struct cfg80211_beacon_data {
size_t probe_resp_len;
size_t lci_len;
size_t civicloc_len;
+ struct cfg80211_he_bss_color he_bss_color;
+ bool he_bss_color_valid;
};
struct mac_address {
@@ -934,27 +1248,37 @@ struct cfg80211_acl_data {
struct mac_address mac_addrs[];
};
-/*
- * cfg80211_bitrate_mask - masks for bitrate control
+/**
+ * struct cfg80211_fils_discovery - FILS discovery parameters from
+ * IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
+ *
+ * @min_interval: Minimum packet interval in TUs (0 - 10000)
+ * @max_interval: Maximum packet interval in TUs (0 - 10000)
+ * @tmpl_len: Template length
+ * @tmpl: Template data for FILS discovery frame including the action
+ * frame headers.
*/
-struct cfg80211_bitrate_mask {
- struct {
- u32 legacy;
- u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
- u16 vht_mcs[NL80211_VHT_NSS_MAX];
- enum nl80211_txrate_gi gi;
- } control[NUM_NL80211_BANDS];
+struct cfg80211_fils_discovery {
+ u32 min_interval;
+ u32 max_interval;
+ size_t tmpl_len;
+ const u8 *tmpl;
};
/**
- * enum cfg80211_ap_settings_flags - AP settings flags
+ * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe
+ * response parameters in 6GHz.
*
- * Used by cfg80211_ap_settings
- *
- * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication
+ * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned
+ * in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive
+ * scanning
+ * @tmpl_len: Template length
+ * @tmpl: Template data for probe response
*/
-enum cfg80211_ap_settings_flags {
- AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0),
+struct cfg80211_unsol_bcast_probe_resp {
+ u32 interval;
+ size_t tmpl_len;
+ const u8 *tmpl;
};
/**
@@ -985,11 +1309,19 @@ enum cfg80211_ap_settings_flags {
* @ht_cap: HT capabilities (or %NULL if HT isn't enabled)
* @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled)
* @he_cap: HE capabilities (or %NULL if HE isn't enabled)
+ * @eht_cap: EHT capabilities (or %NULL if EHT isn't enabled)
+ * @eht_oper: EHT operation IE (or %NULL if EHT isn't enabled)
* @ht_required: stations must support HT
* @vht_required: stations must support VHT
* @twt_responder: Enable Target Wait Time
+ * @he_required: stations must support HE
+ * @sae_h2e_required: stations must support direct H2E technique in SAE
* @flags: flags, as defined in enum cfg80211_ap_settings_flags
* @he_obss_pd: OBSS Packet Detection settings
+ * @he_oper: HE operation IE (or %NULL if HE isn't enabled)
+ * @fils_discovery: FILS discovery transmission parameters
+ * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
+ * @mbssid_config: AP settings for multiple bssid
*/
struct cfg80211_ap_settings {
struct cfg80211_chan_def chandef;
@@ -1014,10 +1346,16 @@ struct cfg80211_ap_settings {
const struct ieee80211_ht_cap *ht_cap;
const struct ieee80211_vht_cap *vht_cap;
const struct ieee80211_he_cap_elem *he_cap;
- bool ht_required, vht_required;
+ const struct ieee80211_he_operation *he_oper;
+ const struct ieee80211_eht_cap_elem *eht_cap;
+ const struct ieee80211_eht_operation *eht_oper;
+ bool ht_required, vht_required, he_required, sae_h2e_required;
bool twt_responder;
u32 flags;
struct ieee80211_he_obss_pd he_obss_pd;
+ struct cfg80211_fils_discovery fils_discovery;
+ struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
+ struct cfg80211_mbssid_config mbssid_config;
};
/**
@@ -1049,7 +1387,26 @@ struct cfg80211_csa_settings {
u8 count;
};
-#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
+/**
+ * struct cfg80211_color_change_settings - color change settings
+ *
+ * Used for bss color change
+ *
+ * @beacon_color_change: beacon data while performing the color countdown
+ * @counter_offset_beacon: offsets of the counters within the beacon (tail)
+ * @counter_offset_presp: offsets of the counters within the probe response
+ * @beacon_next: beacon data to be used after the color change
+ * @count: number of beacons until the color change
+ * @color: the color used after the change
+ */
+struct cfg80211_color_change_settings {
+ struct cfg80211_beacon_data beacon_color_change;
+ u16 counter_offset_beacon;
+ u16 counter_offset_presp;
+ struct cfg80211_beacon_data beacon_next;
+ u8 count;
+ u8 color;
+};
/**
* struct iface_combination_params - input parameters for interface combinations
@@ -1080,6 +1437,7 @@ struct iface_combination_params {
* @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp)
* @STATION_PARAM_APPLY_CAPABILITY: apply new capability
* @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state
+ * @STATION_PARAM_APPLY_STA_TXPOWER: apply tx power for STA
*
* Not all station parameters have in-band "no change" signalling,
* for those that don't these flags will are used.
@@ -1088,7 +1446,6 @@ enum station_parameters_apply_mask {
STATION_PARAM_APPLY_UAPSD = BIT(0),
STATION_PARAM_APPLY_CAPABILITY = BIT(1),
STATION_PARAM_APPLY_PLINK_STATE = BIT(2),
- STATION_PARAM_APPLY_STA_TXPOWER = BIT(3),
};
/**
@@ -1112,14 +1469,66 @@ struct sta_txpwr {
};
/**
- * struct station_parameters - station parameters
+ * struct link_station_parameters - link station parameters
*
- * Used to change and create a new station.
+ * Used to change and create a new link station.
*
- * @vlan: vlan interface station should belong to
+ * @mld_mac: MAC address of the station
+ * @link_id: the link id (-1 for non-MLD station)
+ * @link_mac: MAC address of the link
* @supported_rates: supported rates in IEEE 802.11 format
* (or NULL for no change)
* @supported_rates_len: number of supported rates
+ * @ht_capa: HT capabilities of station
+ * @vht_capa: VHT capabilities of station
+ * @opmode_notif: operating mode field from Operating Mode Notification
+ * @opmode_notif_used: information if operating mode field is used
+ * @he_capa: HE capabilities of station
+ * @he_capa_len: the length of the HE capabilities
+ * @txpwr: transmit power for an associated station
+ * @txpwr_set: txpwr field is set
+ * @he_6ghz_capa: HE 6 GHz Band capabilities of station
+ * @eht_capa: EHT capabilities of station
+ * @eht_capa_len: the length of the EHT capabilities
+ */
+struct link_station_parameters {
+ const u8 *mld_mac;
+ int link_id;
+ const u8 *link_mac;
+ const u8 *supported_rates;
+ u8 supported_rates_len;
+ const struct ieee80211_ht_cap *ht_capa;
+ const struct ieee80211_vht_cap *vht_capa;
+ u8 opmode_notif;
+ bool opmode_notif_used;
+ const struct ieee80211_he_cap_elem *he_capa;
+ u8 he_capa_len;
+ struct sta_txpwr txpwr;
+ bool txpwr_set;
+ const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
+ const struct ieee80211_eht_cap_elem *eht_capa;
+ u8 eht_capa_len;
+};
+
+/**
+ * struct link_station_del_parameters - link station deletion parameters
+ *
+ * Used to delete a link station entry (or all stations).
+ *
+ * @mld_mac: MAC address of the station
+ * @link_id: the link id
+ */
+struct link_station_del_parameters {
+ const u8 *mld_mac;
+ u32 link_id;
+};
+
+/**
+ * struct station_parameters - station parameters
+ *
+ * Used to change and create a new station.
+ *
+ * @vlan: vlan interface station should belong to
* @sta_flags_mask: station flags that changed
* (bitmask of BIT(%NL80211_STA_FLAG_...))
* @sta_flags_set: station flags values
@@ -1130,8 +1539,6 @@ struct sta_txpwr {
* @peer_aid: mesh peer AID or zero for no change
* @plink_action: plink action to take
* @plink_state: set the peer link state for a station
- * @ht_capa: HT capabilities of station
- * @vht_capa: VHT capabilities of station
* @uapsd_queues: bitmap of queues configured for uapsd. same format
* as the AC bitmap in the QoS info field
* @max_sp: max Service Period. same format as the MAX_SP in the
@@ -1148,15 +1555,11 @@ struct sta_txpwr {
* @supported_channels_len: number of supported channels
* @supported_oper_classes: supported oper classes in IEEE 802.11 format
* @supported_oper_classes_len: number of supported operating classes
- * @opmode_notif: operating mode field from Operating Mode Notification
- * @opmode_notif_used: information if operating mode field is used
* @support_p2p_ps: information if station supports P2P PS mechanism
- * @he_capa: HE capabilities of station
- * @he_capa_len: the length of the HE capabilities
* @airtime_weight: airtime scheduler weight for this station
+ * @link_sta_params: link related params.
*/
struct station_parameters {
- const u8 *supported_rates;
struct net_device *vlan;
u32 sta_flags_mask, sta_flags_set;
u32 sta_modify_mask;
@@ -1164,11 +1567,8 @@ struct station_parameters {
u16 aid;
u16 vlan_id;
u16 peer_aid;
- u8 supported_rates_len;
u8 plink_action;
u8 plink_state;
- const struct ieee80211_ht_cap *ht_capa;
- const struct ieee80211_vht_cap *vht_capa;
u8 uapsd_queues;
u8 max_sp;
enum nl80211_mesh_power_mode local_pm;
@@ -1179,13 +1579,9 @@ struct station_parameters {
u8 supported_channels_len;
const u8 *supported_oper_classes;
u8 supported_oper_classes_len;
- u8 opmode_notif;
- bool opmode_notif_used;
int support_p2p_ps;
- const struct ieee80211_he_cap_elem *he_capa;
- u8 he_capa_len;
u16 airtime_weight;
- struct sta_txpwr txpwr;
+ struct link_station_parameters link_sta_params;
};
/**
@@ -1251,7 +1647,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
enum cfg80211_station_type statype);
/**
- * enum station_info_rate_flags - bitrate info flags
+ * enum rate_info_flags - bitrate info flags
*
* Used by the driver to indicate the specific rate transmission
* type for 802.11n transmissions.
@@ -1262,6 +1658,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
* @RATE_INFO_FLAGS_DMG: 60GHz MCS
* @RATE_INFO_FLAGS_HE_MCS: HE MCS information
* @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode
+ * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS
+ * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information
*/
enum rate_info_flags {
RATE_INFO_FLAGS_MCS = BIT(0),
@@ -1270,6 +1668,8 @@ enum rate_info_flags {
RATE_INFO_FLAGS_DMG = BIT(3),
RATE_INFO_FLAGS_HE_MCS = BIT(4),
RATE_INFO_FLAGS_EDMG = BIT(5),
+ RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6),
+ RATE_INFO_FLAGS_EHT_MCS = BIT(7),
};
/**
@@ -1284,6 +1684,8 @@ enum rate_info_flags {
* @RATE_INFO_BW_80: 80 MHz bandwidth
* @RATE_INFO_BW_160: 160 MHz bandwidth
* @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
+ * @RATE_INFO_BW_320: 320 MHz bandwidth
+ * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation
*/
enum rate_info_bw {
RATE_INFO_BW_20 = 0,
@@ -1293,6 +1695,8 @@ enum rate_info_bw {
RATE_INFO_BW_80,
RATE_INFO_BW_160,
RATE_INFO_BW_HE_RU,
+ RATE_INFO_BW_320,
+ RATE_INFO_BW_EHT_RU,
};
/**
@@ -1310,6 +1714,9 @@ enum rate_info_bw {
* @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc,
* only valid if bw is %RATE_INFO_BW_HE_RU)
* @n_bonded_ch: In case of EDMG the number of bonded channels (1-4)
+ * @eht_gi: EHT guard interval (from &enum nl80211_eht_gi)
+ * @eht_ru_alloc: EHT RU allocation (from &enum nl80211_eht_ru_alloc,
+ * only valid if bw is %RATE_INFO_BW_EHT_RU)
*/
struct rate_info {
u8 flags;
@@ -1321,10 +1728,12 @@ struct rate_info {
u8 he_dcm;
u8 he_ru_alloc;
u8 n_bonded_ch;
+ u8 eht_gi;
+ u8 eht_ru_alloc;
};
/**
- * enum station_info_rate_flags - bitrate info flags
+ * enum bss_param_flags - bitrate info flags
*
* Used by the driver to indicate the specific rate transmission
* type for 802.11n transmissions.
@@ -1472,6 +1881,7 @@ struct cfg80211_tid_stats {
* an FCS error. This counter should be incremented only when TA of the
* received packet with an FCS error matches the peer MAC address.
* @airtime_link_metric: mesh airtime link metric.
+ * @connected_to_as: true if mesh STA has a path to authentication server
*/
struct station_info {
u64 filled;
@@ -1529,6 +1939,56 @@ struct station_info {
u32 fcs_err_count;
u32 airtime_link_metric;
+
+ u8 connected_to_as;
+};
+
+/**
+ * struct cfg80211_sar_sub_specs - sub specs limit
+ * @power: power limitation in 0.25dbm
+ * @freq_range_index: index the power limitation applies to
+ */
+struct cfg80211_sar_sub_specs {
+ s32 power;
+ u32 freq_range_index;
+};
+
+/**
+ * struct cfg80211_sar_specs - sar limit specs
+ * @type: it's set with power in 0.25dbm or other types
+ * @num_sub_specs: number of sar sub specs
+ * @sub_specs: memory to hold the sar sub specs
+ */
+struct cfg80211_sar_specs {
+ enum nl80211_sar_type type;
+ u32 num_sub_specs;
+ struct cfg80211_sar_sub_specs sub_specs[];
+};
+
+
+/**
+ * struct cfg80211_sar_freq_ranges - sar frequency ranges
+ * @start_freq: start range edge frequency
+ * @end_freq: end range edge frequency
+ */
+struct cfg80211_sar_freq_ranges {
+ u32 start_freq;
+ u32 end_freq;
+};
+
+/**
+ * struct cfg80211_sar_capa - sar limit capability
+ * @type: it's set via power in 0.25dbm or other types
+ * @num_freq_ranges: number of frequency ranges
+ * @freq_ranges: memory to hold the freq ranges.
+ *
+ * Note: WLAN driver may append new ranges or split an existing
+ * range to small ones and then append them.
+ */
+struct cfg80211_sar_capa {
+ enum nl80211_sar_type type;
+ u32 num_freq_ranges;
+ const struct cfg80211_sar_freq_ranges *freq_ranges;
};
#if IS_ENABLED(CONFIG_CFG80211)
@@ -1655,8 +2115,9 @@ struct mpath_info {
* (or NULL for no change)
* @basic_rates_len: number of basic rates
* @ap_isolate: do not forward packets between connected stations
+ * (0 = no, 1 = yes, -1 = do not change)
* @ht_opmode: HT Operation mode
- * (u16 = opmode, -1 = do not change)
+ * (u16 = opmode, -1 = do not change)
* @p2p_ctwindow: P2P CT Window (-1 = no change)
* @p2p_opp_ps: P2P opportunistic PS (-1 = no change)
*/
@@ -1740,10 +2201,18 @@ struct bss_parameters {
* @plink_timeout: If no tx activity is seen from a STA we've established
* peering with for longer than this time (in seconds), then remove it
* from the STA's list of peers. Default is 30 minutes.
+ * @dot11MeshConnectedToAuthServer: if set to true then this mesh STA
+ * will advertise that it is connected to a authentication server
+ * in the mesh formation field.
* @dot11MeshConnectedToMeshGate: if set to true, advertise that this STA is
* connected to a mesh gate in mesh formation info. If false, the
* value in mesh formation is determined by the presence of root paths
* in the mesh path table
+ * @dot11MeshNolearn: Try to avoid multi-hop path discovery (e.g. PREQ/PREP
+ * for HWMP) if the destination is a direct neighbor. Note that this might
+ * not be the optimal decision as a multi-hop route might be better. So
+ * if using this setting you will likely also want to disable
+ * dot11MeshForwarding and use another mesh routing protocol on top.
*/
struct mesh_config {
u16 dot11MeshRetryTimeout;
@@ -1764,6 +2233,7 @@ struct mesh_config {
u16 dot11MeshHWMPnetDiameterTraversalTime;
u8 dot11MeshHWMPRootMode;
bool dot11MeshConnectedToMeshGate;
+ bool dot11MeshConnectedToAuthServer;
u16 dot11MeshHWMPRannInterval;
bool dot11MeshGateAnnouncementProtocol;
bool dot11MeshForwarding;
@@ -1775,6 +2245,7 @@ struct mesh_config {
enum nl80211_mesh_power_mode power_mode;
u16 dot11MeshAwakeWindowDuration;
u32 plink_timeout;
+ bool dot11MeshNolearn;
};
/**
@@ -1845,6 +2316,7 @@ struct ocb_setup {
* @cwmax: Maximum contention window [a value of the form 2^n-1 in the range
* 1..32767]
* @aifs: Arbitration interframe space [0..255]
+ * @link_id: link_id or -1 for non-MLD
*/
struct ieee80211_txq_params {
enum nl80211_ac ac;
@@ -1852,6 +2324,7 @@ struct ieee80211_txq_params {
u16 cwmin;
u16 cwmax;
u8 aifs;
+ int link_id;
};
/**
@@ -1903,6 +2376,27 @@ struct cfg80211_scan_info {
};
/**
+ * struct cfg80211_scan_6ghz_params - relevant for 6 GHz only
+ *
+ * @short_ssid: short ssid to scan for
+ * @bssid: bssid to scan for
+ * @channel_idx: idx of the channel in the channel array in the scan request
+ * which the above info relvant to
+ * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU
+ * @short_ssid_valid: @short_ssid is valid and can be used
+ * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
+ * 20 TUs before starting to send probe requests.
+ */
+struct cfg80211_scan_6ghz_params {
+ u32 short_ssid;
+ u32 channel_idx;
+ u8 bssid[ETH_ALEN];
+ bool unsolicited_probe;
+ bool short_ssid_valid;
+ bool psc_no_listen;
+};
+
+/**
* struct cfg80211_scan_request - scan request description
*
* @ssids: SSIDs to scan for (active scan only)
@@ -1929,6 +2423,10 @@ struct cfg80211_scan_info {
* @mac_addr_mask: MAC address mask used with randomisation, bits that
* are 0 in the mask should be randomised, bits that are 1 should
* be taken from the @mac_addr
+ * @scan_6ghz: relevant for split scan request only,
+ * true if this is the second scan request
+ * @n_6ghz_params: number of 6 GHz params
+ * @scan_6ghz_params: 6 GHz params
* @bssid: BSSID to scan for (most commonly, the wildcard BSSID)
*/
struct cfg80211_scan_request {
@@ -1956,9 +2454,12 @@ struct cfg80211_scan_request {
struct cfg80211_scan_info info;
bool notified;
bool no_cck;
+ bool scan_6ghz;
+ u32 n_6ghz_params;
+ struct cfg80211_scan_6ghz_params *scan_6ghz_params;
/* keep last */
- struct ieee80211_channel *channels[0];
+ struct ieee80211_channel *channels[];
};
static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
@@ -2032,8 +2533,8 @@ struct cfg80211_bss_select_adjust {
* @ie_len: length of ie in octets
* @flags: bit field of flags controlling operation
* @match_sets: sets of parameters to be matched for a scan result
- * entry to be considered valid and to be passed to the host
- * (others are filtered out).
+ * entry to be considered valid and to be passed to the host
+ * (others are filtered out).
* If ommited, all results are passed.
* @n_match_sets: number of match sets
* @report_results: indicates that results were reported for this request
@@ -2104,7 +2605,7 @@ struct cfg80211_sched_scan_request {
struct list_head list;
/* keep last */
- struct ieee80211_channel *channels[0];
+ struct ieee80211_channel *channels[];
};
/**
@@ -2226,7 +2727,7 @@ struct cfg80211_bss {
u8 bssid_index;
u8 max_bssid_indicator;
- u8 priv[0] __aligned(sizeof(void *));
+ u8 priv[] __aligned(sizeof(void *));
};
/**
@@ -2251,7 +2752,7 @@ const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id);
*/
static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id)
{
- return (void *)ieee80211_bss_get_elem(bss, id);
+ return (const void *)ieee80211_bss_get_elem(bss, id);
}
@@ -2274,6 +2775,12 @@ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id)
* Authentication algorithm number, i.e., starting at the Authentication
* transaction sequence number field.
* @auth_data_len: Length of auth_data buffer in octets
+ * @link_id: if >= 0, indicates authentication should be done as an MLD,
+ * the interface address is included as the MLD address and the
+ * necessary link (with the given link_id) will be created (and
+ * given an MLD address) by the driver
+ * @ap_mld_addr: AP MLD address in case of authentication request with
+ * an AP MLD, valid iff @link_id >= 0
*/
struct cfg80211_auth_request {
struct cfg80211_bss *bss;
@@ -2281,9 +2788,25 @@ struct cfg80211_auth_request {
size_t ie_len;
enum nl80211_auth_type auth_type;
const u8 *key;
- u8 key_len, key_idx;
+ u8 key_len;
+ s8 key_idx;
const u8 *auth_data;
size_t auth_data_len;
+ s8 link_id;
+ const u8 *ap_mld_addr;
+};
+
+/**
+ * struct cfg80211_assoc_link - per-link information for MLO association
+ * @bss: the BSS pointer, see also &struct cfg80211_assoc_request::bss;
+ * if this is %NULL for a link, that link is not requested
+ * @elems: extra elements for the per-STA profile for this link
+ * @elems_len: length of the elements
+ */
+struct cfg80211_assoc_link {
+ struct cfg80211_bss *bss;
+ const u8 *elems;
+ size_t elems_len;
};
/**
@@ -2296,12 +2819,20 @@ struct cfg80211_auth_request {
* authentication capability. Drivers can offload authentication to
* userspace if this flag is set. Only applicable for cfg80211_connect()
* request (connect callback).
+ * @ASSOC_REQ_DISABLE_HE: Disable HE
+ * @ASSOC_REQ_DISABLE_EHT: Disable EHT
+ * @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links.
+ * Drivers shall disable MLO features for the current association if this
+ * flag is not set.
*/
enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HT = BIT(0),
ASSOC_REQ_DISABLE_VHT = BIT(1),
ASSOC_REQ_USE_RRM = BIT(2),
CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3),
+ ASSOC_REQ_DISABLE_HE = BIT(4),
+ ASSOC_REQ_DISABLE_EHT = BIT(5),
+ CONNECT_REQ_MLO_SUPPORT = BIT(6),
};
/**
@@ -2313,6 +2844,8 @@ enum cfg80211_assoc_req_flags {
* given a reference that it must give back to cfg80211_send_rx_assoc()
* or to cfg80211_assoc_timeout(). To ensure proper refcounting, new
* association requests while already associating must be rejected.
+ * This also applies to the @links.bss parameter, which is used instead
+ * of this one (it is %NULL) for MLO associations.
* @ie: Extra IEs to add to (Re)Association Request frame or %NULL
* @ie_len: Length of ie buffer in octets
* @use_mfp: Use management frame protection (IEEE 802.11w) in this association
@@ -2335,6 +2868,13 @@ enum cfg80211_assoc_req_flags {
* @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association
* Request/Response frame or %NULL if FILS is not used. This field starts
* with 16 octets of STA Nonce followed by 16 octets of AP Nonce.
+ * @s1g_capa: S1G capability override
+ * @s1g_capa_mask: S1G capability override mask
+ * @links: per-link information for MLO connections
+ * @link_id: >= 0 for MLO connections, where links are given, and indicates
+ * the link on which the association request should be sent
+ * @ap_mld_addr: AP MLD address in case of MLO association request,
+ * valid iff @link_id >= 0
*/
struct cfg80211_assoc_request {
struct cfg80211_bss *bss;
@@ -2349,6 +2889,10 @@ struct cfg80211_assoc_request {
const u8 *fils_kek;
size_t fils_kek_len;
const u8 *fils_nonces;
+ struct ieee80211_s1g_cap s1g_capa, s1g_capa_mask;
+ struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS];
+ const u8 *ap_mld_addr;
+ s8 link_id;
};
/**
@@ -2357,7 +2901,7 @@ struct cfg80211_assoc_request {
* This structure provides information needed to complete IEEE 802.11
* deauthentication.
*
- * @bssid: the BSSID of the BSS to deauthenticate from
+ * @bssid: the BSSID or AP MLD address to deauthenticate from
* @ie: Extra IEs to add to Deauthentication frame or %NULL
* @ie_len: Length of ie buffer in octets
* @reason_code: The reason code for the deauthentication
@@ -2378,7 +2922,7 @@ struct cfg80211_deauth_request {
* This structure provides information needed to complete IEEE 802.11
* disassociation.
*
- * @bss: the BSS to disassociate from
+ * @ap_addr: the BSSID or AP MLD address to disassociate from
* @ie: Extra IEs to add to Disassociation frame or %NULL
* @ie_len: Length of ie buffer in octets
* @reason_code: The reason code for the disassociation
@@ -2386,7 +2930,7 @@ struct cfg80211_deauth_request {
* Disassociation frame is to be transmitted.
*/
struct cfg80211_disassoc_request {
- struct cfg80211_bss *bss;
+ const u8 *ap_addr;
const u8 *ie;
size_t ie_len;
u16 reason_code;
@@ -2426,7 +2970,7 @@ struct cfg80211_disassoc_request {
* will be used in ht_capa. Un-supported values will be ignored.
* @ht_capa_mask: The bits of ht_capa which are to be used.
* @wep_keys: static WEP keys, if not NULL points to an array of
- * CFG80211_MAX_WEP_KEYS WEP keys
+ * CFG80211_MAX_WEP_KEYS WEP keys
* @wep_tx_key: key index (0..3) of the default TX static WEP key
*/
struct cfg80211_ibss_params {
@@ -2631,6 +3175,17 @@ enum wiphy_params_flags {
* @cache_id: 2-octet cache identifier advertized by a FILS AP identifying the
* scope of PMKSA. This is valid only if @ssid_len is non-zero (may be
* %NULL).
+ * @pmk_lifetime: Maximum lifetime for PMKSA in seconds
+ * (dot11RSNAConfigPMKLifetime) or 0 if not specified.
+ * The configured PMKSA must not be used for PMKSA caching after
+ * expiration and any keys derived from this PMK become invalid on
+ * expiration, i.e., the current association must be dropped if the PMK
+ * used for it expires.
+ * @pmk_reauth_threshold: Threshold time for reauthentication (percentage of
+ * PMK lifetime, dot11RSNAConfigPMKReauthThreshold) or 0 if not specified.
+ * Drivers are expected to trigger a full authentication instead of using
+ * this PMKSA for caching when reassociating to a new BSS after this
+ * threshold to generate a new PMK before the current one expires.
*/
struct cfg80211_pmksa {
const u8 *bssid;
@@ -2640,6 +3195,8 @@ struct cfg80211_pmksa {
const u8 *ssid;
size_t ssid_len;
const u8 *cache_id;
+ u32 pmk_lifetime;
+ u8 pmk_reauth_threshold;
};
/**
@@ -2814,12 +3371,17 @@ struct cfg80211_wowlan_wakeup {
/**
* struct cfg80211_gtk_rekey_data - rekey data
- * @kek: key encryption key (NL80211_KEK_LEN bytes)
- * @kck: key confirmation key (NL80211_KCK_LEN bytes)
+ * @kek: key encryption key (@kek_len bytes)
+ * @kck: key confirmation key (@kck_len bytes)
* @replay_ctr: replay counter (NL80211_REPLAY_CTR_LEN bytes)
+ * @kek_len: length of kek
+ * @kck_len: length of kck
+ * @akm: akm (oui, id)
*/
struct cfg80211_gtk_rekey_data {
const u8 *kek, *kck, *replay_ctr;
+ u32 akm;
+ u8 kek_len, kck_len;
};
/**
@@ -2851,6 +3413,9 @@ struct cfg80211_update_ft_ies_params {
* @dont_wait_for_ack: tells the low level not to wait for an ack
* @n_csa_offsets: length of csa_offsets array
* @csa_offsets: array of all the csa offsets in the frame
+ * @link_id: for MLO, the link ID to transmit on, -1 if not given; note
+ * that the link ID isn't validated (much), it's in range but the
+ * link might not exist (or be used by the receiver STA)
*/
struct cfg80211_mgmt_tx_params {
struct ieee80211_channel *chan;
@@ -2862,6 +3427,7 @@ struct cfg80211_mgmt_tx_params {
bool dont_wait_for_ack;
int n_csa_offsets;
const u16 *csa_offsets;
+ int link_id;
};
/**
@@ -3175,6 +3741,7 @@ struct cfg80211_pmsr_ftm_result {
* @type: type of the measurement reported, note that we only support reporting
* one type at a time, but you can report multiple results separately and
* they're all aggregated for userspace.
+ * @ftm: FTM result
*/
struct cfg80211_pmsr_result {
u64 host_time, ap_tsf;
@@ -3204,6 +3771,17 @@ struct cfg80211_pmsr_result {
* @ftmr_retries: number of retries for FTM request
* @request_lci: request LCI information
* @request_civicloc: request civic location information
+ * @trigger_based: use trigger based ranging for the measurement
+ * If neither @trigger_based nor @non_trigger_based is set,
+ * EDCA based ranging will be used.
+ * @non_trigger_based: use non trigger based ranging for the measurement
+ * If neither @trigger_based nor @non_trigger_based is set,
+ * EDCA based ranging will be used.
+ * @lmr_feedback: negotiate for I2R LMR feedback. Only valid if either
+ * @trigger_based or @non_trigger_based is set.
+ * @bss_color: the bss color of the responder. Optional. Set to zero to
+ * indicate the driver should set the BSS color. Only valid if
+ * @non_trigger_based or @trigger_based is set.
*
* See also nl80211 for the respective attribute documentation.
*/
@@ -3213,11 +3791,15 @@ struct cfg80211_pmsr_ftm_request_peer {
u8 requested:1,
asap:1,
request_lci:1,
- request_civicloc:1;
+ request_civicloc:1,
+ trigger_based:1,
+ non_trigger_based:1,
+ lmr_feedback:1;
u8 num_bursts_exp;
u8 burst_duration;
u8 ftms_per_burst;
u8 ftmr_retries;
+ u8 bss_color;
};
/**
@@ -3293,6 +3875,21 @@ struct cfg80211_update_owe_info {
};
/**
+ * struct mgmt_frame_regs - management frame registrations data
+ * @global_stypes: bitmap of management frame subtypes registered
+ * for the entire device
+ * @interface_stypes: bitmap of management frame subtypes registered
+ * for the given interface
+ * @global_mcast_stypes: mcast RX is needed globally for these subtypes
+ * @interface_mcast_stypes: mcast RX is needed on this interface
+ * for these subtypes
+ */
+struct mgmt_frame_regs {
+ u32 global_stypes, interface_stypes;
+ u32 global_mcast_stypes, interface_mcast_stypes;
+};
+
+/**
* struct cfg80211_ops - backend description for wireless configuration
*
* This struct is registered by fullmac card drivers and/or wireless stacks
@@ -3301,9 +3898,10 @@ struct cfg80211_update_owe_info {
* All callbacks except where otherwise noted should return 0
* on success or a negative error code.
*
- * All operations are currently invoked under rtnl for consistency with the
- * wireless extensions but this is subject to reevaluation as soon as this
- * code is used more widely and we have a first user without wext.
+ * All operations are invoked with the wiphy mutex held. The RTNL may be
+ * held in addition (due to wireless extensions) but this cannot be relied
+ * upon except in cases where documented below. Note that due to ordering,
+ * the RTNL also cannot be acquired in any handlers.
*
* @suspend: wiphy device needs to be suspended. The variable @wow will
* be %NULL or contain the enabled Wake-on-Wireless triggers that are
@@ -3318,27 +3916,48 @@ struct cfg80211_update_owe_info {
* the new netdev in the wiphy's network namespace! Returns the struct
* wireless_dev, or an ERR_PTR. For P2P device wdevs, the driver must
* also set the address member in the wdev.
+ * This additionally holds the RTNL to be able to do netdev changes.
*
* @del_virtual_intf: remove the virtual interface
+ * This additionally holds the RTNL to be able to do netdev changes.
*
* @change_virtual_intf: change type/configuration of virtual interface,
* keep the struct wireless_dev's iftype updated.
+ * This additionally holds the RTNL to be able to do netdev changes.
+ *
+ * @add_intf_link: Add a new MLO link to the given interface. Note that
+ * the wdev->link[] data structure has been updated, so the new link
+ * address is available.
+ * @del_intf_link: Remove an MLO link from the given interface.
*
* @add_key: add a key with the given parameters. @mac_addr will be %NULL
- * when adding a group key.
+ * when adding a group key. @link_id will be -1 for non-MLO connection.
+ * For MLO connection, @link_id will be >= 0 for group key and -1 for
+ * pairwise key, @mac_addr will be peer's MLD address for MLO pairwise key.
*
* @get_key: get information about the key with the given parameters.
* @mac_addr will be %NULL when requesting information for a group
* key. All pointers given to the @callback function need not be valid
* after it returns. This function should return an error if it is
* not possible to retrieve the key, -ENOENT if it doesn't exist.
+ * @link_id will be -1 for non-MLO connection. For MLO connection,
+ * @link_id will be >= 0 for group key and -1 for pairwise key, @mac_addr
+ * will be peer's MLD address for MLO pairwise key.
*
* @del_key: remove a key given the @mac_addr (%NULL for a group key)
- * and @key_index, return -ENOENT if the key doesn't exist.
+ * and @key_index, return -ENOENT if the key doesn't exist. @link_id will
+ * be -1 for non-MLO connection. For MLO connection, @link_id will be >= 0
+ * for group key and -1 for pairwise key, @mac_addr will be peer's MLD
+ * address for MLO pairwise key.
+ *
+ * @set_default_key: set the default key on an interface. @link_id will be >= 0
+ * for MLO connection and -1 for non-MLO connection.
*
- * @set_default_key: set the default key on an interface
+ * @set_default_mgmt_key: set the default management frame key on an interface.
+ * @link_id will be >= 0 for MLO connection and -1 for non-MLO connection.
*
- * @set_default_mgmt_key: set the default management frame key on an interface
+ * @set_default_beacon_key: set the default Beacon frame key on an interface.
+ * @link_id will be >= 0 for MLO connection and -1 for non-MLO connection.
*
* @set_rekey_data: give the data necessary for GTK rekeying to the driver
*
@@ -3458,8 +4077,6 @@ struct cfg80211_update_owe_info {
* @get_tx_power: store the current TX power into the dbm variable;
* return 0 if successful
*
- * @set_wds_peer: set the WDS peer for a WDS interface
- *
* @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting
* functions to adjust rfkill hw state
*
@@ -3514,8 +4131,8 @@ struct cfg80211_update_owe_info {
* The driver should not call cfg80211_sched_scan_stopped() for a requested
* stop (when this method returns 0).
*
- * @mgmt_frame_register: Notify driver that a management frame type was
- * registered. The callback is allowed to sleep.
+ * @update_mgmt_frame_registrations: Notify the driver that management frame
+ * registrations were updated. The callback is allowed to sleep.
*
* @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device.
* Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may
@@ -3639,6 +4256,30 @@ struct cfg80211_update_owe_info {
*
* @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame
* and overrule HWMP path selection algorithm.
+ * @set_tid_config: TID specific configuration, this can be peer or BSS specific
+ * This callback may sleep.
+ * @reset_tid_config: Reset TID specific configuration for the peer, for the
+ * given TIDs. This callback may sleep.
+ *
+ * @set_sar_specs: Update the SAR (TX power) settings.
+ *
+ * @color_change: Initiate a color change.
+ *
+ * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use
+ * those to decrypt (Re)Association Request and encrypt (Re)Association
+ * Response frame.
+ *
+ * @set_radar_background: Configure dedicated offchannel chain available for
+ * radar/CAC detection on some hw. This chain can't be used to transmit
+ * or receive frames and it is bounded to a running wdev.
+ * Background radar/CAC detection allows to avoid the CAC downtime
+ * switching to a different channel during CAC detection on the selected
+ * radar channel.
+ * The caller is expected to set chandef pointer to NULL in order to
+ * disable background CAC/radar detection.
+ * @add_link_station: Add a link to a station.
+ * @mod_link_station: Modify a link of a station.
+ * @del_link_station: Remove a link of a station.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3657,27 +4298,40 @@ struct cfg80211_ops {
enum nl80211_iftype type,
struct vif_params *params);
+ int (*add_intf_link)(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ unsigned int link_id);
+ void (*del_intf_link)(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ unsigned int link_id);
+
int (*add_key)(struct wiphy *wiphy, struct net_device *netdev,
- u8 key_index, bool pairwise, const u8 *mac_addr,
- struct key_params *params);
+ int link_id, u8 key_index, bool pairwise,
+ const u8 *mac_addr, struct key_params *params);
int (*get_key)(struct wiphy *wiphy, struct net_device *netdev,
- u8 key_index, bool pairwise, const u8 *mac_addr,
- void *cookie,
+ int link_id, u8 key_index, bool pairwise,
+ const u8 *mac_addr, void *cookie,
void (*callback)(void *cookie, struct key_params*));
int (*del_key)(struct wiphy *wiphy, struct net_device *netdev,
- u8 key_index, bool pairwise, const u8 *mac_addr);
+ int link_id, u8 key_index, bool pairwise,
+ const u8 *mac_addr);
int (*set_default_key)(struct wiphy *wiphy,
- struct net_device *netdev,
+ struct net_device *netdev, int link_id,
u8 key_index, bool unicast, bool multicast);
int (*set_default_mgmt_key)(struct wiphy *wiphy,
- struct net_device *netdev,
+ struct net_device *netdev, int link_id,
u8 key_index);
+ int (*set_default_beacon_key)(struct wiphy *wiphy,
+ struct net_device *netdev,
+ int link_id,
+ u8 key_index);
int (*start_ap)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ap_settings *settings);
int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_beacon_data *info);
- int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev);
+ int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev,
+ unsigned int link_id);
int (*add_station)(struct wiphy *wiphy, struct net_device *dev,
@@ -3773,9 +4427,6 @@ struct cfg80211_ops {
int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
int *dbm);
- int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev,
- const u8 *addr);
-
void (*rfkill_poll)(struct wiphy *wiphy);
#ifdef CONFIG_NL80211_TESTMODE
@@ -3788,6 +4439,7 @@ struct cfg80211_ops {
int (*set_bitrate_mask)(struct wiphy *wiphy,
struct net_device *dev,
+ unsigned int link_id,
const u8 *peer,
const struct cfg80211_bitrate_mask *mask);
@@ -3831,9 +4483,9 @@ struct cfg80211_ops {
struct net_device *dev,
u32 rate, u32 pkts, u32 intvl);
- void (*mgmt_frame_register)(struct wiphy *wiphy,
- struct wireless_dev *wdev,
- u16 frame_type, bool reg);
+ void (*update_mgmt_frame_registrations)(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ struct mgmt_frame_regs *upd);
int (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant);
int (*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant);
@@ -3863,6 +4515,7 @@ struct cfg80211_ops {
int (*get_channel)(struct wiphy *wiphy,
struct wireless_dev *wdev,
+ unsigned int link_id,
struct cfg80211_chan_def *chandef);
int (*start_p2p_device)(struct wiphy *wiphy,
@@ -3899,6 +4552,7 @@ struct cfg80211_ops {
struct cfg80211_qos_map *qos_map);
int (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
+ unsigned int link_id,
struct cfg80211_chan_def *chandef);
int (*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
@@ -3945,7 +4599,8 @@ struct cfg80211_ops {
struct net_device *dev,
const u8 *buf, size_t len,
const u8 *dest, const __be16 proto,
- const bool noencrypt);
+ const bool noencrypt, int link_id,
+ u64 *cookie);
int (*get_ftm_responder_stats)(struct wiphy *wiphy,
struct net_device *dev,
@@ -3959,6 +4614,25 @@ struct cfg80211_ops {
struct cfg80211_update_owe_info *owe_info);
int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len);
+ int (*set_tid_config)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_tid_config *tid_conf);
+ int (*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 tids);
+ int (*set_sar_specs)(struct wiphy *wiphy,
+ struct cfg80211_sar_specs *sar);
+ int (*color_change)(struct wiphy *wiphy,
+ struct net_device *dev,
+ struct cfg80211_color_change_settings *params);
+ int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_fils_aad *fils_aad);
+ int (*set_radar_background)(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef);
+ int (*add_link_station)(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_parameters *params);
+ int (*mod_link_station)(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_parameters *params);
+ int (*del_link_station)(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_del_parameters *params);
};
/*
@@ -3969,6 +4643,8 @@ struct cfg80211_ops {
/**
* enum wiphy_flags - wiphy capability flags
*
+ * @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split
+ * into two, first for legacy bands and second for UHB.
* @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
* wiphy at all
* @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
@@ -4007,11 +4683,16 @@ struct cfg80211_ops {
* beaconing mode (AP, IBSS, Mesh, ...).
* @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation
* before connection.
+ * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys
+ * @WIPHY_FLAG_SUPPORTS_MLO: This is a temporary flag gating the MLO APIs,
+ * in order to not have them reachable in normal drivers, until we have
+ * complete feature/interface combinations/etc. advertisement. No driver
+ * should set this flag for now.
*/
enum wiphy_flags {
- /* use hole at 0 */
- /* use hole at 1 */
- /* use hole at 2 */
+ WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0),
+ WIPHY_FLAG_SUPPORTS_MLO = BIT(1),
+ WIPHY_FLAG_SPLIT_SCAN_6GHZ = BIT(2),
WIPHY_FLAG_NETNS_OK = BIT(3),
WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4),
WIPHY_FLAG_4ADDR_AP = BIT(5),
@@ -4331,19 +5012,32 @@ struct wiphy_vendor_command {
* 802.11-2012 8.4.2.29 for the defined fields.
* @extended_capabilities_mask: mask of the valid values
* @extended_capabilities_len: length of the extended capabilities
+ * @eml_capabilities: EML capabilities (for MLO)
+ * @mld_capa_and_ops: MLD capabilities and operations (for MLO)
*/
struct wiphy_iftype_ext_capab {
enum nl80211_iftype iftype;
const u8 *extended_capabilities;
const u8 *extended_capabilities_mask;
u8 extended_capabilities_len;
+ u16 eml_capabilities;
+ u16 mld_capa_and_ops;
};
/**
+ * cfg80211_get_iftype_ext_capa - lookup interface type extended capability
+ * @wiphy: the wiphy to look up from
+ * @type: the interface type to look up
+ */
+const struct wiphy_iftype_ext_capab *
+cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type);
+
+/**
* struct cfg80211_pmsr_capabilities - cfg80211 peer measurement capabilities
* @max_peers: maximum number of peers in a single measurement
* @report_ap_tsf: can report assoc AP's TSF for radio resource measurement
* @randomize_mac_addr: can randomize MAC address for measurement
+ * @ftm: FTM measurement data
* @ftm.supported: FTM measurement is supported
* @ftm.asap: ASAP-mode is supported
* @ftm.non_asap: non-ASAP-mode is supported
@@ -4356,6 +5050,8 @@ struct wiphy_iftype_ext_capab {
* forbid using the value 15 to let the responder pick)
* @ftm.max_ftms_per_burst: maximum FTMs per burst supported (set to 0 if
* not limited)
+ * @ftm.trigger_based: trigger based ranging measurement is supported
+ * @ftm.non_trigger_based: non trigger based ranging measurement is supported
*/
struct cfg80211_pmsr_capabilities {
unsigned int max_peers;
@@ -4371,24 +5067,50 @@ struct cfg80211_pmsr_capabilities {
asap:1,
non_asap:1,
request_lci:1,
- request_civicloc:1;
+ request_civicloc:1,
+ trigger_based:1,
+ non_trigger_based:1;
} ftm;
};
/**
+ * struct wiphy_iftype_akm_suites - This structure encapsulates supported akm
+ * suites for interface types defined in @iftypes_mask. Each type in the
+ * @iftypes_mask must be unique across all instances of iftype_akm_suites.
+ *
+ * @iftypes_mask: bitmask of interfaces types
+ * @akm_suites: points to an array of supported akm suites
+ * @n_akm_suites: number of supported AKM suites
+ */
+struct wiphy_iftype_akm_suites {
+ u16 iftypes_mask;
+ const u32 *akm_suites;
+ int n_akm_suites;
+};
+
+/**
* struct wiphy - wireless hardware description
+ * @mtx: mutex for the data (structures) of this device
* @reg_notifier: the driver's regulatory notification callback,
* note that if your driver uses wiphy_apply_custom_regulatory()
* the reg_notifier's request can be passed as NULL
* @regd: the driver's regulatory domain, if one was requested via
- * the regulatory_hint() API. This can be used by the driver
+ * the regulatory_hint() API. This can be used by the driver
* on the reg_notifier() if it chooses to ignore future
* regulatory domain changes caused by other drivers.
* @signal_type: signal type reported in &struct cfg80211_bss.
* @cipher_suites: supported cipher suites
* @n_cipher_suites: number of supported cipher suites
- * @akm_suites: supported AKM suites
+ * @akm_suites: supported AKM suites. These are the default AKMs supported if
+ * the supported AKMs not advertized for a specific interface type in
+ * iftype_akm_suites.
* @n_akm_suites: number of supported AKM suites
+ * @iftype_akm_suites: array of supported akm suites info per interface type.
+ * Note that the bits in @iftypes_mask inside this structure cannot
+ * overlap (i.e. only one occurrence of each type is allowed across all
+ * instances of iftype_akm_suites).
+ * @num_iftype_akm_suites: number of interface types for which supported akm
+ * suites are specified separately.
* @retry_short: Retry limit for short frames (dot11ShortRetryLimit)
* @retry_long: Retry limit for long frames (dot11LongRetryLimit)
* @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold);
@@ -4409,10 +5131,11 @@ struct cfg80211_pmsr_capabilities {
* the same number of arbitrary MAC addresses.
* @registered: protects ->resume and ->suspend sysfs callbacks against
* unregister hardware
- * @debugfsdir: debugfs directory used for this wiphy, will be renamed
- * automatically on wiphy renames
- * @dev: (virtual) struct device for this wiphy
- * @registered: helps synchronize suspend/resume with wiphy unregister
+ * @debugfsdir: debugfs directory used for this wiphy (ieee80211/<wiphyname>).
+ * It will be renamed automatically on wiphy renames
+ * @dev: (virtual) struct device for this wiphy. The item in
+ * /sys/class/ieee80211/ points to this. You need use set_wiphy_dev()
+ * (see below).
* @wext: wireless extension handlers
* @priv: driver private data (sized according to wiphy_new() parameter)
* @interface_modes: bitmask of interfaces types valid for this wiphy,
@@ -4523,12 +5246,6 @@ struct cfg80211_pmsr_capabilities {
* and probe responses. This value should be set if the driver
* wishes to limit the number of csa counters. Default (0) means
* infinite.
- * @max_adj_channel_rssi_comp: max offset of between the channel on which the
- * frame was sent and the channel on which the frame was heard for which
- * the reported rssi is still valid. If a driver is able to compensate the
- * low rssi when a frame is heard on different channel, then it should set
- * this variable to the maximal offset for which it can compensate.
- * This value should be set in MHz.
* @bss_select_support: bitmask indicating the BSS selection criteria supported
* by the driver in the .connect() callback. The bit position maps to the
* attribute indices defined in &enum nl80211_bss_select_attr.
@@ -4542,17 +5259,50 @@ struct cfg80211_pmsr_capabilities {
* @txq_memory_limit: configuration internal TX queue memory limit
* @txq_quantum: configuration of internal TX queue scheduler quantum
*
+ * @tx_queue_len: allow setting transmit queue len for drivers not using
+ * wake_tx_queue
+ *
* @support_mbssid: can HW support association with nontransmitted AP
* @support_only_he_mbssid: don't parse MBSSID elements if it is not
* HE AP, in order to avoid compatibility issues.
* @support_mbssid must be set for this to have any effect.
*
* @pmsr_capa: peer measurement capabilities
+ *
+ * @tid_config_support: describes the per-TID config support that the
+ * device has
+ * @tid_config_support.vif: bitmap of attributes (configurations)
+ * supported by the driver for each vif
+ * @tid_config_support.peer: bitmap of attributes (configurations)
+ * supported by the driver for each peer
+ * @tid_config_support.max_retry: maximum supported retry count for
+ * long/short retry configuration
+ *
+ * @max_data_retry_count: maximum supported per TID retry count for
+ * configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and
+ * %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes
+ * @sar_capa: SAR control capabilities
+ * @rfkill: a pointer to the rfkill structure
+ *
+ * @mbssid_max_interfaces: maximum number of interfaces supported by the driver
+ * in a multiple BSSID set. This field must be set to a non-zero value
+ * by the driver to advertise MBSSID support.
+ * @ema_max_profile_periodicity: maximum profile periodicity supported by
+ * the driver. Setting this field to a non-zero value indicates that the
+ * driver supports enhanced multi-BSSID advertisements (EMA AP).
+ * @max_num_akm_suites: maximum number of AKM suites allowed for
+ * configuration through %NL80211_CMD_CONNECT, %NL80211_CMD_ASSOCIATE and
+ * %NL80211_CMD_START_AP. Set to NL80211_MAX_NR_AKM_SUITES if not set by
+ * driver. If set by driver minimum allowed value is
+ * NL80211_MAX_NR_AKM_SUITES in order to avoid compatibility issues with
+ * legacy userspace and maximum allowed value is
+ * CFG80211_MAX_NUM_AKM_SUITES.
*/
struct wiphy {
+ struct mutex mtx;
+
/* assign these fields before you register the wiphy */
- /* permanent MAC address(es) */
u8 perm_addr[ETH_ALEN];
u8 addr_mask[ETH_ALEN];
@@ -4595,6 +5345,9 @@ struct wiphy {
int n_akm_suites;
const u32 *akm_suites;
+ const struct wiphy_iftype_akm_suites *iftype_akm_suites;
+ unsigned int num_iftype_akm_suites;
+
u8 retry_short;
u8 retry_long;
u32 frag_threshold;
@@ -4616,11 +5369,6 @@ struct wiphy {
u32 available_antennas_tx;
u32 available_antennas_rx;
- /*
- * Bitmap of supported protocols for probe response offloading
- * see &enum nl80211_probe_resp_offload_support_attr. Only valid
- * when the wiphy flag @WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD is set.
- */
u32 probe_resp_offload;
const u8 *extended_capabilities, *extended_capabilities_mask;
@@ -4629,16 +5377,10 @@ struct wiphy {
const struct wiphy_iftype_ext_capab *iftype_ext_capab;
unsigned int num_iftype_ext_capab;
- /* If multiple wiphys are registered and you're handed e.g.
- * a regular netdev with assigned ieee80211_ptr, you won't
- * know whether it points to a wiphy your driver has registered
- * or not. Assign this to something global to your driver to
- * help determine whether you own this wiphy or not. */
const void *privid;
struct ieee80211_supported_band *bands[NUM_NL80211_BANDS];
- /* Lets us get back the wiphy on the callback */
void (*reg_notifier)(struct wiphy *wiphy,
struct regulatory_request *request);
@@ -4646,14 +5388,10 @@ struct wiphy {
const struct ieee80211_regdomain __rcu *regd;
- /* the item in /sys/class/ieee80211/ points to this,
- * you need use set_wiphy_dev() (see below) */
struct device dev;
- /* protects ->resume, ->suspend sysfs callbacks against unregister hw */
bool registered;
- /* dir in debugfs: ieee80211/<wiphyname> */
struct dentry *debugfsdir;
const struct ieee80211_ht_cap *ht_capa_mod_mask;
@@ -4661,7 +5399,6 @@ struct wiphy {
struct list_head wdev_list;
- /* the network namespace this phy lives in currently */
possible_net_t _net;
#ifdef CONFIG_CFG80211_WEXT
@@ -4677,7 +5414,6 @@ struct wiphy {
u16 max_ap_assoc_sta;
u8 max_num_csa_counters;
- u8 max_adj_channel_rssi_comp;
u32 bss_select_support;
@@ -4687,12 +5423,29 @@ struct wiphy {
u32 txq_memory_limit;
u32 txq_quantum;
+ unsigned long tx_queue_len;
+
u8 support_mbssid:1,
support_only_he_mbssid:1;
const struct cfg80211_pmsr_capabilities *pmsr_capa;
- char priv[0] __aligned(NETDEV_ALIGN);
+ struct {
+ u64 peer, vif;
+ u8 max_retry;
+ } tid_config_support;
+
+ u8 max_data_retry_count;
+
+ const struct cfg80211_sar_capa *sar_capa;
+
+ struct rfkill *rfkill;
+
+ u8 mbssid_max_interfaces;
+ u8 ema_max_profile_periodicity;
+ u16 max_num_akm_suites;
+
+ char priv[] __aligned(NETDEV_ALIGN);
};
static inline struct net *wiphy_net(struct wiphy *wiphy)
@@ -4806,6 +5559,37 @@ static inline struct wiphy *wiphy_new(const struct cfg80211_ops *ops,
*/
int wiphy_register(struct wiphy *wiphy);
+/* this is a define for better error reporting (file/line) */
+#define lockdep_assert_wiphy(wiphy) lockdep_assert_held(&(wiphy)->mtx)
+
+/**
+ * rcu_dereference_wiphy - rcu_dereference with debug checking
+ * @wiphy: the wiphy to check the locking on
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
+ * or RTNL. Note: Please prefer wiphy_dereference() or rcu_dereference().
+ */
+#define rcu_dereference_wiphy(wiphy, p) \
+ rcu_dereference_check(p, lockdep_is_held(&wiphy->mtx))
+
+/**
+ * wiphy_dereference - fetch RCU pointer when updates are prevented by wiphy mtx
+ * @wiphy: the wiphy to check the locking on
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Return the value of the specified RCU-protected pointer, but omit the
+ * READ_ONCE(), because caller holds the wiphy mutex used for updates.
+ */
+#define wiphy_dereference(wiphy, p) \
+ rcu_dereference_protected(p, lockdep_is_held(&wiphy->mtx))
+
+/**
+ * get_wiphy_regdom - get custom regdomain for the given wiphy
+ * @wiphy: the wiphy to get the regdomain from
+ */
+const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy);
+
/**
* wiphy_unregister - deregister a wiphy from cfg80211
*
@@ -4831,13 +5615,45 @@ struct cfg80211_cached_keys;
struct cfg80211_cqm_config;
/**
+ * wiphy_lock - lock the wiphy
+ * @wiphy: the wiphy to lock
+ *
+ * This is mostly exposed so it can be done around registering and
+ * unregistering netdevs that aren't created through cfg80211 calls,
+ * since that requires locking in cfg80211 when the notifiers is
+ * called, but that cannot differentiate which way it's called.
+ *
+ * When cfg80211 ops are called, the wiphy is already locked.
+ */
+static inline void wiphy_lock(struct wiphy *wiphy)
+ __acquires(&wiphy->mtx)
+{
+ mutex_lock(&wiphy->mtx);
+ __acquire(&wiphy->mtx);
+}
+
+/**
+ * wiphy_unlock - unlock the wiphy again
+ * @wiphy: the wiphy to unlock
+ */
+static inline void wiphy_unlock(struct wiphy *wiphy)
+ __releases(&wiphy->mtx)
+{
+ __release(&wiphy->mtx);
+ mutex_unlock(&wiphy->mtx);
+}
+
+/**
* struct wireless_dev - wireless device state
*
* For netdevs, this structure must be allocated by the driver
* that uses the ieee80211_ptr field in struct net_device (this
* is intentional so it can be allocated along with the netdev.)
* It need not be registered then as netdev registration will
- * be intercepted by cfg80211 to see the new wireless device.
+ * be intercepted by cfg80211 to see the new wireless device,
+ * however, drivers must lock the wiphy before registering or
+ * unregistering netdevs if they pre-create any netdevs (in ops
+ * called from cfg80211, the wiphy is already locked.)
*
* For non-netdev uses, it must also be allocated by the driver
* in response to the cfg80211 callbacks that require it, as
@@ -4846,20 +5662,16 @@ struct cfg80211_cqm_config;
*
* @wiphy: pointer to hardware description
* @iftype: interface type
+ * @registered: is this wdev already registered with cfg80211
+ * @registering: indicates we're doing registration under wiphy lock
+ * for the notifier
* @list: (private) Used to collect the interfaces
* @netdev: (private) Used to reference back to the netdev, may be %NULL
* @identifier: (private) Identifier used in nl80211 to identify this
* wireless device if it has no netdev
- * @current_bss: (private) Used by the internal configuration code
- * @chandef: (private) Used by the internal configuration code to track
- * the user-set channel definition.
- * @preset_chandef: (private) Used by the internal configuration code to
- * track the channel to be used for AP later
+ * @u: union containing data specific to @iftype
+ * @connected: indicates if connected or not (STA mode)
* @bssid: (private) Used by the internal configuration code
- * @ssid: (private) Used by the internal configuration code
- * @ssid_len: (private) Used by the internal configuration code
- * @mesh_id_len: (private) Used by the internal configuration code
- * @mesh_id_up_len: (private) Used by the internal configuration code
* @wext: (private) Used by the internal wireless extensions compat code
* @wext.ibss: (private) IBSS data part of wext handling
* @wext.connect: (private) connection handling data
@@ -4877,7 +5689,8 @@ struct cfg80211_cqm_config;
* netdev and may otherwise be used by driver read-only, will be update
* by cfg80211 on change_interface
* @mgmt_registrations: list of registrations for management frames
- * @mgmt_registrations_lock: lock for the list
+ * @mgmt_registrations_need_update: mgmt registrations were updated,
+ * need to propagate the update to the driver
* @mtx: mutex used to lock data in this struct, may be used by drivers
* and some API functions require it held
* @beacon_interval: beacon interval used on this device for transmitting
@@ -4898,8 +5711,6 @@ struct cfg80211_cqm_config;
* @conn_owner_nlportid: (private) connection owner socket port ID
* @disconnect_wk: (private) auto-disconnect work
* @disconnect_bssid: (private) the BSSID to use for auto-disconnect
- * @ibss_fixed: (private) IBSS is using fixed BSSID
- * @ibss_dfs_possible: (private) IBSS may change to a DFS channel
* @event_list: (private) list for internal event processing
* @event_lock: (private) lock for event list
* @owner_nlportid: (private) owner socket port ID
@@ -4908,6 +5719,11 @@ struct cfg80211_cqm_config;
* @pmsr_list: (private) peer measurement requests
* @pmsr_lock: (private) peer measurements requests/results lock
* @pmsr_free_wk: (private) peer measurements cleanup work
+ * @unprot_beacon_reported: (private) timestamp of last
+ * unprotected beacon report
+ * @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr
+ * @ap and @client for each link
+ * @valid_links: bitmap describing what elements of @links are valid
*/
struct wireless_dev {
struct wiphy *wiphy;
@@ -4920,17 +5736,15 @@ struct wireless_dev {
u32 identifier;
struct list_head mgmt_registrations;
- spinlock_t mgmt_registrations_lock;
+ u8 mgmt_registrations_need_update:1;
struct mutex mtx;
- bool use_4addr, is_running;
+ bool use_4addr, is_running, registered, registering;
u8 address[ETH_ALEN] __aligned(sizeof(u16));
/* currently used for IBSS and SME - might be rearranged later */
- u8 ssid[IEEE80211_MAX_SSID_LEN];
- u8 ssid_len, mesh_id_len, mesh_id_up_len;
struct cfg80211_conn *conn;
struct cfg80211_cached_keys *connect_keys;
enum ieee80211_bss_type conn_bss_type;
@@ -4942,23 +5756,17 @@ struct wireless_dev {
struct list_head event_list;
spinlock_t event_lock;
- struct cfg80211_internal_bss *current_bss; /* associated / joined */
- struct cfg80211_chan_def preset_chandef;
- struct cfg80211_chan_def chandef;
-
- bool ibss_fixed;
- bool ibss_dfs_possible;
+ u8 connected:1;
bool ps;
int ps_timeout;
- int beacon_interval;
-
u32 ap_unexpected_nlportid;
u32 owner_nlportid;
bool nl_owner_dead;
+ /* FIXME: need to rework radar detection for MLO */
bool cac_started;
unsigned long cac_start_time;
unsigned int cac_time_ms;
@@ -4984,9 +5792,55 @@ struct wireless_dev {
struct list_head pmsr_list;
spinlock_t pmsr_lock;
struct work_struct pmsr_free_wk;
+
+ unsigned long unprot_beacon_reported;
+
+ union {
+ struct {
+ u8 connected_addr[ETH_ALEN] __aligned(2);
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ u8 ssid_len;
+ } client;
+ struct {
+ int beacon_interval;
+ struct cfg80211_chan_def preset_chandef;
+ struct cfg80211_chan_def chandef;
+ u8 id[IEEE80211_MAX_SSID_LEN];
+ u8 id_len, id_up_len;
+ } mesh;
+ struct {
+ struct cfg80211_chan_def preset_chandef;
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ u8 ssid_len;
+ } ap;
+ struct {
+ struct cfg80211_internal_bss *current_bss;
+ struct cfg80211_chan_def chandef;
+ int beacon_interval;
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ u8 ssid_len;
+ } ibss;
+ struct {
+ struct cfg80211_chan_def chandef;
+ } ocb;
+ } u;
+
+ struct {
+ u8 addr[ETH_ALEN] __aligned(2);
+ union {
+ struct {
+ unsigned int beacon_interval;
+ struct cfg80211_chan_def chandef;
+ } ap;
+ struct {
+ struct cfg80211_internal_bss *current_bss;
+ } client;
+ };
+ } links[IEEE80211_MLD_MAX_NUM_LINKS];
+ u16 valid_links;
};
-static inline u8 *wdev_address(struct wireless_dev *wdev)
+static inline const u8 *wdev_address(struct wireless_dev *wdev)
{
if (wdev->netdev)
return wdev->netdev->dev_addr;
@@ -5013,35 +5867,148 @@ static inline void *wdev_priv(struct wireless_dev *wdev)
}
/**
+ * wdev_chandef - return chandef pointer from wireless_dev
+ * @wdev: the wdev
+ * @link_id: the link ID for MLO
+ *
+ * Return: The chandef depending on the mode, or %NULL.
+ */
+struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev,
+ unsigned int link_id);
+
+static inline void WARN_INVALID_LINK_ID(struct wireless_dev *wdev,
+ unsigned int link_id)
+{
+ WARN_ON(link_id && !wdev->valid_links);
+ WARN_ON(wdev->valid_links &&
+ !(wdev->valid_links & BIT(link_id)));
+}
+
+#define for_each_valid_link(link_info, link_id) \
+ for (link_id = 0; \
+ link_id < ((link_info)->valid_links ? \
+ ARRAY_SIZE((link_info)->links) : 1); \
+ link_id++) \
+ if (!(link_info)->valid_links || \
+ ((link_info)->valid_links & BIT(link_id)))
+
+/**
* DOC: Utility functions
*
* cfg80211 offers a number of utility functions that can be useful.
*/
/**
+ * ieee80211_channel_equal - compare two struct ieee80211_channel
+ *
+ * @a: 1st struct ieee80211_channel
+ * @b: 2nd struct ieee80211_channel
+ * Return: true if center frequency of @a == @b
+ */
+static inline bool
+ieee80211_channel_equal(struct ieee80211_channel *a,
+ struct ieee80211_channel *b)
+{
+ return (a->center_freq == b->center_freq &&
+ a->freq_offset == b->freq_offset);
+}
+
+/**
+ * ieee80211_channel_to_khz - convert ieee80211_channel to frequency in KHz
+ * @chan: struct ieee80211_channel to convert
+ * Return: The corresponding frequency (in KHz)
+ */
+static inline u32
+ieee80211_channel_to_khz(const struct ieee80211_channel *chan)
+{
+ return MHZ_TO_KHZ(chan->center_freq) + chan->freq_offset;
+}
+
+/**
+ * ieee80211_s1g_channel_width - get allowed channel width from @chan
+ *
+ * Only allowed for band NL80211_BAND_S1GHZ
+ * @chan: channel
+ * Return: The allowed channel width for this center_freq
+ */
+enum nl80211_chan_width
+ieee80211_s1g_channel_width(const struct ieee80211_channel *chan);
+
+/**
+ * ieee80211_channel_to_freq_khz - convert channel number to frequency
+ * @chan: channel number
+ * @band: band, necessary due to channel number overlap
+ * Return: The corresponding frequency (in KHz), or 0 if the conversion failed.
+ */
+u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band);
+
+/**
* ieee80211_channel_to_frequency - convert channel number to frequency
* @chan: channel number
* @band: band, necessary due to channel number overlap
* Return: The corresponding frequency (in MHz), or 0 if the conversion failed.
*/
-int ieee80211_channel_to_frequency(int chan, enum nl80211_band band);
+static inline int
+ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
+{
+ return KHZ_TO_MHZ(ieee80211_channel_to_freq_khz(chan, band));
+}
+
+/**
+ * ieee80211_freq_khz_to_channel - convert frequency to channel number
+ * @freq: center frequency in KHz
+ * Return: The corresponding channel, or 0 if the conversion failed.
+ */
+int ieee80211_freq_khz_to_channel(u32 freq);
/**
* ieee80211_frequency_to_channel - convert frequency to channel number
- * @freq: center frequency
+ * @freq: center frequency in MHz
* Return: The corresponding channel, or 0 if the conversion failed.
*/
-int ieee80211_frequency_to_channel(int freq);
+static inline int
+ieee80211_frequency_to_channel(int freq)
+{
+ return ieee80211_freq_khz_to_channel(MHZ_TO_KHZ(freq));
+}
+
+/**
+ * ieee80211_get_channel_khz - get channel struct from wiphy for specified
+ * frequency
+ * @wiphy: the struct wiphy to get the channel for
+ * @freq: the center frequency (in KHz) of the channel
+ * Return: The channel struct from @wiphy at @freq.
+ */
+struct ieee80211_channel *
+ieee80211_get_channel_khz(struct wiphy *wiphy, u32 freq);
/**
* ieee80211_get_channel - get channel struct from wiphy for specified frequency
*
* @wiphy: the struct wiphy to get the channel for
- * @freq: the center frequency of the channel
- *
+ * @freq: the center frequency (in MHz) of the channel
* Return: The channel struct from @wiphy at @freq.
*/
-struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq);
+static inline struct ieee80211_channel *
+ieee80211_get_channel(struct wiphy *wiphy, int freq)
+{
+ return ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(freq));
+}
+
+/**
+ * cfg80211_channel_is_psc - Check if the channel is a 6 GHz PSC
+ * @chan: control channel to check
+ *
+ * The Preferred Scanning Channels (PSC) are defined in
+ * Draft IEEE P802.11ax/D5.0, 26.17.2.3.3
+ */
+static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
+{
+ if (chan->band != NL80211_BAND_6GHZ)
+ return false;
+
+ return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5;
+}
/**
* ieee80211_get_response_rate - get basic rate for a given rate
@@ -5055,7 +6022,7 @@ struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq);
* which is, for this function, given as a bitmap of indices of
* rates in the band's bitrate table.
*/
-struct ieee80211_rate *
+const struct ieee80211_rate *
ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
u32 basic_rates, int bitrate);
@@ -5074,7 +6041,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
* Radiotap parsing functions -- for controlled injection support
*
* Implemented in net/wireless/radiotap.c
- * Documentation in Documentation/networking/radiotap-headers.txt
+ * Documentation in Documentation/networking/radiotap-headers.rst
*/
struct radiotap_align_size {
@@ -5201,11 +6168,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
* @addr: the device MAC address
* @iftype: the virtual interface type
* @data_offset: offset of payload after the 802.11 header
+ * @is_amsdu: true if the 802.11 header is A-MSDU
* Return: 0 on success. Non-zero on error.
*/
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
const u8 *addr, enum nl80211_iftype iftype,
- u8 data_offset);
+ u8 data_offset, bool is_amsdu);
/**
* ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -5217,7 +6185,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
enum nl80211_iftype iftype)
{
- return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
+ return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false);
}
/**
@@ -5229,7 +6197,7 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
*
* @skb: The input A-MSDU frame without any headers.
* @list: The output list of 802.3 frames. It must be allocated and
- * initialized by by the caller.
+ * initialized by the caller.
* @addr: The device MAC address.
* @iftype: The device interface type.
* @extra_headroom: The hardware extra headroom for SKBs in the @list.
@@ -5313,9 +6281,9 @@ cfg80211_find_ie_match(u8 eid, const u8 *ies, unsigned int len,
(!match_len && match_offset)))
return NULL;
- return (void *)cfg80211_find_elem_match(eid, ies, len,
- match, match_len,
- match_offset ?
+ return (const void *)cfg80211_find_elem_match(eid, ies, len,
+ match, match_len,
+ match_offset ?
match_offset - 2 : 0);
}
@@ -5442,7 +6410,7 @@ static inline const u8 *
cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
const u8 *ies, unsigned int len)
{
- return (void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len);
+ return (const void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len);
}
/**
@@ -5467,9 +6435,9 @@ void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr);
* @wiphy: the wireless device giving the hint (used only for reporting
* conflicts)
* @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain
- * should be in. If @rd is set this should be NULL. Note that if you
- * set this to NULL you should still set rd->alpha2 to some accepted
- * alpha2.
+ * should be in. If @rd is set this should be NULL. Note that if you
+ * set this to NULL you should still set rd->alpha2 to some accepted
+ * alpha2.
*
* Wireless drivers can use this function to hint to the wireless core
* what it believes should be the current regulatory domain by
@@ -5504,18 +6472,18 @@ int regulatory_set_wiphy_regd(struct wiphy *wiphy,
struct ieee80211_regdomain *rd);
/**
- * regulatory_set_wiphy_regd_sync_rtnl - set regdom for self-managed drivers
+ * regulatory_set_wiphy_regd_sync - set regdom for self-managed drivers
* @wiphy: the wireless device we want to process the regulatory domain on
* @rd: the regulatory domain information to use for this wiphy
*
- * This functions requires the RTNL to be held and applies the new regdomain
- * synchronously to this wiphy. For more details see
- * regulatory_set_wiphy_regd().
+ * This functions requires the RTNL and the wiphy mutex to be held and
+ * applies the new regdomain synchronously to this wiphy. For more details
+ * see regulatory_set_wiphy_regd().
*
* Return: 0 on success. -EINVAL, -EPERM
*/
-int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy,
- struct ieee80211_regdomain *rd);
+int regulatory_set_wiphy_regd_sync(struct wiphy *wiphy,
+ struct ieee80211_regdomain *rd);
/**
* wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
@@ -5633,7 +6601,7 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid);
void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid);
/**
- * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped
+ * cfg80211_sched_scan_stopped_locked - notify that the scheduled scan has stopped
*
* @wiphy: the wiphy on which the scheduled scan stopped
* @reqid: identifier for the related scheduled scan request
@@ -5641,9 +6609,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid);
* The driver can call this function to inform cfg80211 that the
* scheduled scan had to be stopped, for whatever reason. The driver
* is then called back via the sched_scan_stop operation when done.
- * This function should be called with rtnl locked.
+ * This function should be called with the wiphy mutex held.
*/
-void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid);
+void cfg80211_sched_scan_stopped_locked(struct wiphy *wiphy, u64 reqid);
/**
* cfg80211_inform_bss_frame_data - inform cfg80211 of a received BSS frame
@@ -5753,6 +6721,19 @@ enum cfg80211_bss_frame_type {
};
/**
+ * cfg80211_get_ies_channel_number - returns the channel number from ies
+ * @ie: IEs
+ * @ielen: length of IEs
+ * @band: enum nl80211_band of the channel
+ * @ftype: frame type
+ *
+ * Returns the channel number, or -1 if none could be determined.
+ */
+int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
+ enum nl80211_band band,
+ enum cfg80211_bss_frame_type ftype);
+
+/**
* cfg80211_inform_bss_data - inform cfg80211 of a new BSS
*
* @wiphy: the wiphy reporting the BSS
@@ -5940,16 +6921,36 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len);
void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
/**
- * cfg80211_rx_assoc_resp - notification of processed association response
- * @dev: network device
+ * struct cfg80211_rx_assoc_resp - association response data
* @bss: the BSS that association was requested with, ownership of the pointer
- * moves to cfg80211 in this call
+ * moves to cfg80211 in the call to cfg80211_rx_assoc_resp()
* @buf: (Re)Association Response frame (header + body)
* @len: length of the frame data
* @uapsd_queues: bitmap of queues configured for uapsd. Same format
* as the AC bitmap in the QoS info field
* @req_ies: information elements from the (Re)Association Request frame
* @req_ies_len: length of req_ies data
+ * @ap_mld_addr: AP MLD address (in case of MLO)
+ * @links: per-link information indexed by link ID, use links[0] for
+ * non-MLO connections
+ */
+struct cfg80211_rx_assoc_resp {
+ const u8 *buf;
+ size_t len;
+ const u8 *req_ies;
+ size_t req_ies_len;
+ int uapsd_queues;
+ const u8 *ap_mld_addr;
+ struct {
+ const u8 *addr;
+ struct cfg80211_bss *bss;
+ } links[IEEE80211_MLD_MAX_NUM_LINKS];
+};
+
+/**
+ * cfg80211_rx_assoc_resp - notification of processed association response
+ * @dev: network device
+ * @data: association response data, &struct cfg80211_rx_assoc_resp
*
* After being asked to associate via cfg80211_ops::assoc() the driver must
* call either this function or cfg80211_auth_timeout().
@@ -5957,53 +6958,61 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
* This function may sleep. The caller must hold the corresponding wdev's mutex.
*/
void cfg80211_rx_assoc_resp(struct net_device *dev,
- struct cfg80211_bss *bss,
- const u8 *buf, size_t len,
- int uapsd_queues,
- const u8 *req_ies, size_t req_ies_len);
+ struct cfg80211_rx_assoc_resp *data);
/**
- * cfg80211_assoc_timeout - notification of timed out association
- * @dev: network device
- * @bss: The BSS entry with which association timed out.
- *
- * This function may sleep. The caller must hold the corresponding wdev's mutex.
+ * struct cfg80211_assoc_failure - association failure data
+ * @ap_mld_addr: AP MLD address, or %NULL
+ * @bss: list of BSSes, must use entry 0 for non-MLO connections
+ * (@ap_mld_addr is %NULL)
+ * @timeout: indicates the association failed due to timeout, otherwise
+ * the association was abandoned for a reason reported through some
+ * other API (e.g. deauth RX)
*/
-void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss);
+struct cfg80211_assoc_failure {
+ const u8 *ap_mld_addr;
+ struct cfg80211_bss *bss[IEEE80211_MLD_MAX_NUM_LINKS];
+ bool timeout;
+};
/**
- * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt
+ * cfg80211_assoc_failure - notification of association failure
* @dev: network device
- * @bss: The BSS entry with which association was abandoned.
+ * @data: data describing the association failure
*
- * Call this whenever - for reasons reported through other API, like deauth RX,
- * an association attempt was abandoned.
* This function may sleep. The caller must hold the corresponding wdev's mutex.
*/
-void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss);
+void cfg80211_assoc_failure(struct net_device *dev,
+ struct cfg80211_assoc_failure *data);
/**
* cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame
* @dev: network device
* @buf: 802.11 frame (header + body)
* @len: length of the frame data
+ * @reconnect: immediate reconnect is desired (include the nl80211 attribute)
*
* This function is called whenever deauthentication has been processed in
* station mode. This includes both received deauthentication frames and
* locally generated ones. This function may sleep. The caller must hold the
* corresponding wdev's mutex.
*/
-void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len,
+ bool reconnect);
/**
* cfg80211_rx_unprot_mlme_mgmt - notification of unprotected mlme mgmt frame
* @dev: network device
- * @buf: deauthentication frame (header + body)
+ * @buf: received management frame (header + body)
* @len: length of the frame data
*
* This function is called whenever a received deauthentication or dissassoc
* frame has been dropped in station mode because of MFP being used but the
- * frame was not protected. This function may sleep.
+ * frame was not protected. This is also used to notify reception of a Beacon
+ * frame that was dropped because it did not include a valid MME MIC while
+ * beacon protection was enabled (BIGTK configured in station mode).
+ *
+ * This function may sleep.
*/
void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev,
const u8 *buf, size_t len);
@@ -6044,12 +7053,14 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
struct ieee80211_channel *channel, gfp_t gfp);
/**
- * cfg80211_notify_new_candidate - notify cfg80211 of a new mesh peer candidate
+ * cfg80211_notify_new_peer_candidate - notify cfg80211 of a new mesh peer
+ * candidate
*
* @dev: network device
* @macaddr: the MAC address of the new candidate
* @ie: information elements advertised by the peer candidate
* @ie_len: length of the information elements buffer
+ * @sig_dbm: signal level in dBm
* @gfp: allocation flags
*
* This function notifies cfg80211 that the mesh peer candidate has been
@@ -6075,11 +7086,19 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev,
*/
/**
- * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state
+ * wiphy_rfkill_set_hw_state_reason - notify cfg80211 about hw block state
* @wiphy: the wiphy
* @blocked: block status
+ * @reason: one of reasons in &enum rfkill_hard_block_reasons
*/
-void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked);
+void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
+ enum rfkill_hard_block_reasons reason);
+
+static inline void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
+{
+ wiphy_rfkill_set_hw_state_reason(wiphy, blocked,
+ RFKILL_HARD_BLOCK_SIGNAL);
+}
/**
* wiphy_rfkill_start_polling - start polling rfkill
@@ -6091,7 +7110,10 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy);
* wiphy_rfkill_stop_polling - stop polling rfkill
* @wiphy: the wiphy
*/
-void wiphy_rfkill_stop_polling(struct wiphy *wiphy);
+static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
+{
+ rfkill_pause_polling(wiphy->rfkill);
+}
/**
* DOC: Vendor commands
@@ -6173,7 +7195,7 @@ cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
int cfg80211_vendor_cmd_reply(struct sk_buff *skb);
/**
- * cfg80211_vendor_cmd_get_sender
+ * cfg80211_vendor_cmd_get_sender - get the current sender netlink ID
* @wiphy: the wiphy
*
* Return the current netlink port ID in a vendor command handler.
@@ -6403,13 +7425,6 @@ struct cfg80211_fils_resp_params {
* indicate that this is a failure, but without a status code.
* @timeout_reason is used to report the reason for the timeout in that
* case.
- * @bssid: The BSSID of the AP (may be %NULL)
- * @bss: Entry of bss to which STA got connected to, can be obtained through
- * cfg80211_get_bss() (may be %NULL). But it is recommended to store the
- * bss from the connect_request and hold a reference to it and return
- * through this param to avoid a warning if the bss is expired during the
- * connection, esp. for those drivers implementing connect op.
- * Only one parameter among @bssid and @bss needs to be specified.
* @req_ie: Association request IEs (may be %NULL)
* @req_ie_len: Association request IEs length
* @resp_ie: Association response IEs (may be %NULL)
@@ -6421,17 +7436,41 @@ struct cfg80211_fils_resp_params {
* not known. This value is used only if @status < 0 to indicate that the
* failure is due to a timeout and not due to explicit rejection by the AP.
* This value is ignored in other cases (@status >= 0).
+ * @valid_links: For MLO connection, BIT mask of the valid link ids. Otherwise
+ * zero.
+ * @ap_mld_addr: For MLO connection, MLD address of the AP. Otherwise %NULL.
+ * @links : For MLO connection, contains link info for the valid links indicated
+ * using @valid_links. For non-MLO connection, links[0] contains the
+ * connected AP info.
+ * @links.addr: For MLO connection, MAC address of the STA link. Otherwise
+ * %NULL.
+ * @links.bssid: For MLO connection, MAC address of the AP link. For non-MLO
+ * connection, links[0].bssid points to the BSSID of the AP (may be %NULL).
+ * @links.bss: For MLO connection, entry of bss to which STA link is connected.
+ * For non-MLO connection, links[0].bss points to entry of bss to which STA
+ * is connected. It can be obtained through cfg80211_get_bss() (may be
+ * %NULL). It is recommended to store the bss from the connect_request and
+ * hold a reference to it and return through this param to avoid a warning
+ * if the bss is expired during the connection, esp. for those drivers
+ * implementing connect op. Only one parameter among @bssid and @bss needs
+ * to be specified.
*/
struct cfg80211_connect_resp_params {
int status;
- const u8 *bssid;
- struct cfg80211_bss *bss;
const u8 *req_ie;
size_t req_ie_len;
const u8 *resp_ie;
size_t resp_ie_len;
struct cfg80211_fils_resp_params fils;
enum nl80211_timeout_reason timeout_reason;
+
+ const u8 *ap_mld_addr;
+ u16 valid_links;
+ struct {
+ const u8 *addr;
+ const u8 *bssid;
+ struct cfg80211_bss *bss;
+ } links[IEEE80211_MLD_MAX_NUM_LINKS];
};
/**
@@ -6501,8 +7540,8 @@ cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
memset(&params, 0, sizeof(params));
params.status = status;
- params.bssid = bssid;
- params.bss = bss;
+ params.links[0].bssid = bssid;
+ params.links[0].bss = bss;
params.req_ie = req_ie;
params.req_ie_len = req_ie_len;
params.resp_ie = resp_ie;
@@ -6573,24 +7612,40 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid,
/**
* struct cfg80211_roam_info - driver initiated roaming information
*
- * @channel: the channel of the new AP
- * @bss: entry of bss to which STA got roamed (may be %NULL if %bssid is set)
- * @bssid: the BSSID of the new AP (may be %NULL if %bss is set)
* @req_ie: association request IEs (maybe be %NULL)
* @req_ie_len: association request IEs length
* @resp_ie: association response IEs (may be %NULL)
* @resp_ie_len: assoc response IEs length
* @fils: FILS related roaming information.
+ * @valid_links: For MLO roaming, BIT mask of the new valid links is set.
+ * Otherwise zero.
+ * @ap_mld_addr: For MLO roaming, MLD address of the new AP. Otherwise %NULL.
+ * @links : For MLO roaming, contains new link info for the valid links set in
+ * @valid_links. For non-MLO roaming, links[0] contains the new AP info.
+ * @links.addr: For MLO roaming, MAC address of the STA link. Otherwise %NULL.
+ * @links.bssid: For MLO roaming, MAC address of the new AP link. For non-MLO
+ * roaming, links[0].bssid points to the BSSID of the new AP. May be
+ * %NULL if %links.bss is set.
+ * @links.channel: the channel of the new AP.
+ * @links.bss: For MLO roaming, entry of new bss to which STA link got
+ * roamed. For non-MLO roaming, links[0].bss points to entry of bss to
+ * which STA got roamed (may be %NULL if %links.bssid is set)
*/
struct cfg80211_roam_info {
- struct ieee80211_channel *channel;
- struct cfg80211_bss *bss;
- const u8 *bssid;
const u8 *req_ie;
size_t req_ie_len;
const u8 *resp_ie;
size_t resp_ie_len;
struct cfg80211_fils_resp_params fils;
+
+ const u8 *ap_mld_addr;
+ u16 valid_links;
+ struct {
+ const u8 *addr;
+ const u8 *bssid;
+ struct ieee80211_channel *channel;
+ struct cfg80211_bss *bss;
+ } links[IEEE80211_MLD_MAX_NUM_LINKS];
};
/**
@@ -6757,6 +7812,80 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
gfp_t gfp);
/**
+ * struct cfg80211_rx_info - received management frame info
+ *
+ * @freq: Frequency on which the frame was received in kHz
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
+ * @have_link_id: indicates the frame was received on a link of
+ * an MLD, i.e. the @link_id field is valid
+ * @link_id: the ID of the link the frame was received on
+ * @buf: Management frame (header + body)
+ * @len: length of the frame data
+ * @flags: flags, as defined in enum nl80211_rxmgmt_flags
+ * @rx_tstamp: Hardware timestamp of frame RX in nanoseconds
+ * @ack_tstamp: Hardware timestamp of ack TX in nanoseconds
+ */
+struct cfg80211_rx_info {
+ int freq;
+ int sig_dbm;
+ bool have_link_id;
+ u8 link_id;
+ const u8 *buf;
+ size_t len;
+ u32 flags;
+ u64 rx_tstamp;
+ u64 ack_tstamp;
+};
+
+/**
+ * cfg80211_rx_mgmt_ext - management frame notification with extended info
+ * @wdev: wireless device receiving the frame
+ * @info: RX info as defined in struct cfg80211_rx_info
+ *
+ * This function is called whenever an Action frame is received for a station
+ * mode interface, but is not processed in kernel.
+ *
+ * Return: %true if a user space application has registered for this frame.
+ * For action frames, that makes it responsible for rejecting unrecognized
+ * action frames; %false otherwise, in which case for action frames the
+ * driver is responsible for rejecting the frame.
+ */
+bool cfg80211_rx_mgmt_ext(struct wireless_dev *wdev,
+ struct cfg80211_rx_info *info);
+
+/**
+ * cfg80211_rx_mgmt_khz - notification of received, unprocessed management frame
+ * @wdev: wireless device receiving the frame
+ * @freq: Frequency on which the frame was received in KHz
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
+ * @buf: Management frame (header + body)
+ * @len: length of the frame data
+ * @flags: flags, as defined in enum nl80211_rxmgmt_flags
+ *
+ * This function is called whenever an Action frame is received for a station
+ * mode interface, but is not processed in kernel.
+ *
+ * Return: %true if a user space application has registered for this frame.
+ * For action frames, that makes it responsible for rejecting unrecognized
+ * action frames; %false otherwise, in which case for action frames the
+ * driver is responsible for rejecting the frame.
+ */
+static inline bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq,
+ int sig_dbm, const u8 *buf, size_t len,
+ u32 flags)
+{
+ struct cfg80211_rx_info info = {
+ .freq = freq,
+ .sig_dbm = sig_dbm,
+ .buf = buf,
+ .len = len,
+ .flags = flags
+ };
+
+ return cfg80211_rx_mgmt_ext(wdev, &info);
+}
+
+/**
* cfg80211_rx_mgmt - notification of received, unprocessed management frame
* @wdev: wireless device receiving the frame
* @freq: Frequency on which the frame was received in MHz
@@ -6773,8 +7902,52 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
* action frames; %false otherwise, in which case for action frames the
* driver is responsible for rejecting the frame.
*/
-bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
- const u8 *buf, size_t len, u32 flags);
+static inline bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq,
+ int sig_dbm, const u8 *buf, size_t len,
+ u32 flags)
+{
+ struct cfg80211_rx_info info = {
+ .freq = MHZ_TO_KHZ(freq),
+ .sig_dbm = sig_dbm,
+ .buf = buf,
+ .len = len,
+ .flags = flags
+ };
+
+ return cfg80211_rx_mgmt_ext(wdev, &info);
+}
+
+/**
+ * struct cfg80211_tx_status - TX status for management frame information
+ *
+ * @cookie: Cookie returned by cfg80211_ops::mgmt_tx()
+ * @tx_tstamp: hardware TX timestamp in nanoseconds
+ * @ack_tstamp: hardware ack RX timestamp in nanoseconds
+ * @buf: Management frame (header + body)
+ * @len: length of the frame data
+ * @ack: Whether frame was acknowledged
+ */
+struct cfg80211_tx_status {
+ u64 cookie;
+ u64 tx_tstamp;
+ u64 ack_tstamp;
+ const u8 *buf;
+ size_t len;
+ bool ack;
+};
+
+/**
+ * cfg80211_mgmt_tx_status_ext - TX status notification with extended info
+ * @wdev: wireless device receiving the frame
+ * @status: TX status data
+ * @gfp: context flags
+ *
+ * This function is called whenever a management frame was requested to be
+ * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the
+ * transmission attempt with extended info.
+ */
+void cfg80211_mgmt_tx_status_ext(struct wireless_dev *wdev,
+ struct cfg80211_tx_status *status, gfp_t gfp);
/**
* cfg80211_mgmt_tx_status - notification of TX status for management frame
@@ -6789,9 +7962,37 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
* transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the
* transmission attempt.
*/
-void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
- const u8 *buf, size_t len, bool ack, gfp_t gfp);
+static inline void cfg80211_mgmt_tx_status(struct wireless_dev *wdev,
+ u64 cookie, const u8 *buf,
+ size_t len, bool ack, gfp_t gfp)
+{
+ struct cfg80211_tx_status status = {
+ .cookie = cookie,
+ .buf = buf,
+ .len = len,
+ .ack = ack
+ };
+ cfg80211_mgmt_tx_status_ext(wdev, &status, gfp);
+}
+
+/**
+ * cfg80211_control_port_tx_status - notification of TX status for control
+ * port frames
+ * @wdev: wireless device receiving the frame
+ * @cookie: Cookie returned by cfg80211_ops::tx_control_port()
+ * @buf: Data frame (header + body)
+ * @len: length of the frame data
+ * @ack: Whether frame was acknowledged
+ * @gfp: context flags
+ *
+ * This function is called whenever a control port frame was requested to be
+ * transmitted with cfg80211_ops::tx_control_port() to report the TX status of
+ * the transmission attempt.
+ */
+void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
+ const u8 *buf, size_t len, bool ack,
+ gfp_t gfp);
/**
* cfg80211_rx_control_port - notification about a received control port frame
@@ -6866,15 +8067,33 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer,
void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp);
/**
- * cfg80211_radar_event - radar detection event
+ * __cfg80211_radar_event - radar detection event
* @wiphy: the wiphy
* @chandef: chandef for the current channel
+ * @offchan: the radar has been detected on the offchannel chain
* @gfp: context flags
*
* This function is called when a radar is detected on the current chanenl.
*/
-void cfg80211_radar_event(struct wiphy *wiphy,
- struct cfg80211_chan_def *chandef, gfp_t gfp);
+void __cfg80211_radar_event(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ bool offchan, gfp_t gfp);
+
+static inline void
+cfg80211_radar_event(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ gfp_t gfp)
+{
+ __cfg80211_radar_event(wiphy, chandef, false, gfp);
+}
+
+static inline void
+cfg80211_background_radar_event(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ gfp_t gfp)
+{
+ __cfg80211_radar_event(wiphy, chandef, true, gfp);
+}
/**
* cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
@@ -6905,6 +8124,14 @@ void cfg80211_cac_event(struct net_device *netdev,
const struct cfg80211_chan_def *chandef,
enum nl80211_radar_event event, gfp_t gfp);
+/**
+ * cfg80211_background_cac_abort - Channel Availability Check offchan abort event
+ * @wiphy: the wiphy
+ *
+ * This function is called by the driver when a Channel Availability Check
+ * (CAC) is aborted by a offchannel dedicated chain.
+ */
+void cfg80211_background_cac_abort(struct wiphy *wiphy);
/**
* cfg80211_gtk_rekey_notify - notify userspace about driver rekeying
@@ -6973,6 +8200,21 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
bool is_valid_ack_signal, gfp_t gfp);
/**
+ * cfg80211_report_obss_beacon_khz - report beacon from other APs
+ * @wiphy: The wiphy that received the beacon
+ * @frame: the frame
+ * @len: length of the frame
+ * @freq: frequency the frame was received on in KHz
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
+ *
+ * Use this function to report to userspace when a beacon was
+ * received. It is not useful to call this when there is no
+ * netdev that is in AP/GO mode.
+ */
+void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame,
+ size_t len, int freq, int sig_dbm);
+
+/**
* cfg80211_report_obss_beacon - report beacon from other APs
* @wiphy: The wiphy that received the beacon
* @frame: the frame
@@ -6984,9 +8226,13 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
* received. It is not useful to call this when there is no
* netdev that is in AP/GO mode.
*/
-void cfg80211_report_obss_beacon(struct wiphy *wiphy,
- const u8 *frame, size_t len,
- int freq, int sig_dbm);
+static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
+ const u8 *frame, size_t len,
+ int freq, int sig_dbm)
+{
+ cfg80211_report_obss_beacon_khz(wiphy, frame, len, MHZ_TO_KHZ(freq),
+ sig_dbm);
+}
/**
* cfg80211_reg_can_beacon - check if beaconing is allowed
@@ -7012,7 +8258,7 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
* also checks if IR-relaxation conditions apply, to allow beaconing under
* more permissive conditions.
*
- * Requires the RTNL to be held.
+ * Requires the wiphy mutex to be held.
*/
bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
struct cfg80211_chan_def *chandef,
@@ -7022,18 +8268,22 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
* cfg80211_ch_switch_notify - update wdev channel and notify userspace
* @dev: the device which switched channels
* @chandef: the new channel definition
+ * @link_id: the link ID for MLO, must be 0 for non-MLO
*
* Caller must acquire wdev_lock, therefore must only be called from sleepable
* driver context!
*/
void cfg80211_ch_switch_notify(struct net_device *dev,
- struct cfg80211_chan_def *chandef);
+ struct cfg80211_chan_def *chandef,
+ unsigned int link_id);
/*
* cfg80211_ch_switch_started_notify - notify channel switch start
* @dev: the device on which the channel switch started
* @chandef: the future channel definition
+ * @link_id: the link ID for MLO, must be 0 for non-MLO
* @count: the number of TBTTs until the channel switch happens
+ * @quiet: whether or not immediate quiet was requested by the AP
*
* Inform the userspace about the channel switch that has just
* started, so that it can take appropriate actions (eg. starting
@@ -7041,7 +8291,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
*/
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u8 count);
+ unsigned int link_id, u8 count,
+ bool quiet);
/**
* ieee80211_operating_class_to_band - convert operating class to band
@@ -7065,6 +8316,19 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
u8 *op_class);
+/**
+ * ieee80211_chandef_to_khz - convert chandef to frequency in KHz
+ *
+ * @chandef: the chandef to convert
+ *
+ * Returns the center frequency of chandef (1st segment) in KHz.
+ */
+static inline u32
+ieee80211_chandef_to_khz(const struct cfg80211_chan_def *chandef)
+{
+ return MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset;
+}
+
/*
* cfg80211_tdls_oper_request - request userspace to perform TDLS operation
* @dev: the device on which the operation is requested
@@ -7096,20 +8360,49 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate);
* cfg80211_unregister_wdev - remove the given wdev
* @wdev: struct wireless_dev to remove
*
- * Call this function only for wdevs that have no netdev assigned,
- * e.g. P2P Devices. It removes the device from the list so that
- * it can no longer be used. It is necessary to call this function
- * even when cfg80211 requests the removal of the interface by
- * calling the del_virtual_intf() callback. The function must also
- * be called when the driver wishes to unregister the wdev, e.g.
- * when the device is unbound from the driver.
+ * This function removes the device so it can no longer be used. It is necessary
+ * to call this function even when cfg80211 requests the removal of the device
+ * by calling the del_virtual_intf() callback. The function must also be called
+ * when the driver wishes to unregister the wdev, e.g. when the hardware device
+ * is unbound from the driver.
*
- * Requires the RTNL to be held.
+ * Requires the RTNL and wiphy mutex to be held.
*/
void cfg80211_unregister_wdev(struct wireless_dev *wdev);
/**
- * struct cfg80211_ft_event - FT Information Elements
+ * cfg80211_register_netdevice - register the given netdev
+ * @dev: the netdev to register
+ *
+ * Note: In contexts coming from cfg80211 callbacks, you must call this rather
+ * than register_netdevice(), unregister_netdev() is impossible as the RTNL is
+ * held. Otherwise, both register_netdevice() and register_netdev() are usable
+ * instead as well.
+ *
+ * Requires the RTNL and wiphy mutex to be held.
+ */
+int cfg80211_register_netdevice(struct net_device *dev);
+
+/**
+ * cfg80211_unregister_netdevice - unregister the given netdev
+ * @dev: the netdev to register
+ *
+ * Note: In contexts coming from cfg80211 callbacks, you must call this rather
+ * than unregister_netdevice(), unregister_netdev() is impossible as the RTNL
+ * is held. Otherwise, both unregister_netdevice() and unregister_netdev() are
+ * usable instead as well.
+ *
+ * Requires the RTNL and wiphy mutex to be held.
+ */
+static inline void cfg80211_unregister_netdevice(struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_CFG80211)
+ cfg80211_unregister_wdev(dev->ieee80211_ptr);
+#endif
+}
+
+/**
+ * struct cfg80211_ft_event_params - FT Information Elements
* @ies: FT IEs
* @ies_len: length of the FT IE in bytes
* @target_ap: target AP's MAC address
@@ -7462,6 +8755,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
bool is_4addr, u8 check_swif);
+/**
+ * cfg80211_assoc_comeback - notification of association that was
+ * temporarly rejected with a comeback
+ * @netdev: network device
+ * @ap_addr: AP (MLD) address that rejected the assocation
+ * @timeout: timeout interval value TUs.
+ *
+ * this function may sleep. the caller must hold the corresponding wdev's mutex.
+ */
+void cfg80211_assoc_comeback(struct net_device *netdev,
+ const u8 *ap_addr, u32 timeout);
+
/* Logging, debugging and troubleshooting/diagnostic helpers. */
/* wiphy_printk helpers, similar to dev_printk */
@@ -7482,6 +8787,8 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
dev_notice(&(wiphy)->dev, format, ##args)
#define wiphy_info(wiphy, format, args...) \
dev_info(&(wiphy)->dev, format, ##args)
+#define wiphy_info_once(wiphy, format, args...) \
+ dev_info_once(&(wiphy)->dev, format, ##args)
#define wiphy_err_ratelimited(wiphy, format, args...) \
dev_err_ratelimited(&(wiphy)->dev, format, ##args)
@@ -7523,4 +8830,77 @@ void cfg80211_update_owe_info_event(struct net_device *netdev,
struct cfg80211_update_owe_info *owe_info,
gfp_t gfp);
+/**
+ * cfg80211_bss_flush - resets all the scan entries
+ * @wiphy: the wiphy
+ */
+void cfg80211_bss_flush(struct wiphy *wiphy);
+
+/**
+ * cfg80211_bss_color_notify - notify about bss color event
+ * @dev: network device
+ * @gfp: allocation flags
+ * @cmd: the actual event we want to notify
+ * @count: the number of TBTTs until the color change happens
+ * @color_bitmap: representations of the colors that the local BSS is aware of
+ */
+int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+ enum nl80211_commands cmd, u8 count,
+ u64 color_bitmap);
+
+/**
+ * cfg80211_obss_color_collision_notify - notify about bss color collision
+ * @dev: network device
+ * @color_bitmap: representations of the colors that the local BSS is aware of
+ * @gfp: allocation flags
+ */
+static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
+ u64 color_bitmap, gfp_t gfp)
+{
+ return cfg80211_bss_color_notify(dev, gfp,
+ NL80211_CMD_OBSS_COLOR_COLLISION,
+ 0, color_bitmap);
+}
+
+/**
+ * cfg80211_color_change_started_notify - notify color change start
+ * @dev: the device on which the color is switched
+ * @count: the number of TBTTs until the color change happens
+ *
+ * Inform the userspace about the color change that has started.
+ */
+static inline int cfg80211_color_change_started_notify(struct net_device *dev,
+ u8 count)
+{
+ return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ NL80211_CMD_COLOR_CHANGE_STARTED,
+ count, 0);
+}
+
+/**
+ * cfg80211_color_change_aborted_notify - notify color change abort
+ * @dev: the device on which the color is switched
+ *
+ * Inform the userspace about the color change that has aborted.
+ */
+static inline int cfg80211_color_change_aborted_notify(struct net_device *dev)
+{
+ return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ NL80211_CMD_COLOR_CHANGE_ABORTED,
+ 0, 0);
+}
+
+/**
+ * cfg80211_color_change_notify - notify color change completion
+ * @dev: the device on which the color was switched
+ *
+ * Inform the userspace about the color change that has completed.
+ */
+static inline int cfg80211_color_change_notify(struct net_device *dev)
+{
+ return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ NL80211_CMD_COLOR_CHANGE_COMPLETED,
+ 0, 0);
+}
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 6f86073a5d7d..d8d8719315fd 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -203,8 +203,8 @@ struct wpan_phy {
/* PHY depended MAC PIB values */
- /* 802.15.4 acronym: Tdsym in usec */
- u8 symbol_duration;
+ /* 802.15.4 acronym: Tdsym in nsec */
+ u32 symbol_duration;
/* lifs and sifs periods timing */
u16 lifs_period;
u16 sifs_period;
@@ -214,7 +214,7 @@ struct wpan_phy {
/* the network namespace this phy lives in currently */
possible_net_t _net;
- char priv[0] __aligned(NETDEV_ALIGN);
+ char priv[] __aligned(NETDEV_ALIGN);
};
static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy)
@@ -227,6 +227,16 @@ static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net)
write_pnet(&wpan_phy->_net, net);
}
+/**
+ * struct ieee802154_addr - IEEE802.15.4 device address
+ * @mode: Address mode from frame header. Can be one of:
+ * - @IEEE802154_ADDR_NONE
+ * - @IEEE802154_ADDR_SHORT
+ * - @IEEE802154_ADDR_LONG
+ * @pan_id: The PAN ID this address belongs to
+ * @short_addr: address if @mode is @IEEE802154_ADDR_SHORT
+ * @extended_addr: address if @mode is @IEEE802154_ADDR_LONG
+ */
struct ieee802154_addr {
u8 mode;
__le16 pan_id;
@@ -363,6 +373,7 @@ struct wpan_dev {
#define to_phy(_dev) container_of(_dev, struct wpan_phy, dev)
+#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
static inline int
wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
const struct ieee802154_addr *daddr,
@@ -373,6 +384,7 @@ wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len);
}
+#endif
struct wpan_phy *
wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size);
@@ -405,4 +417,6 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy)
return dev_name(&phy->dev);
}
+void ieee802154_configure_durations(struct wpan_phy *phy);
+
#endif /* __NET_CFG802154_H */
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 97bf4885a962..6bc783b7a06c 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -22,39 +22,39 @@
#include <asm/checksum.h>
#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
-static inline
+static __always_inline
__wsum csum_and_copy_from_user (const void __user *src, void *dst,
- int len, __wsum sum, int *err_ptr)
+ int len)
{
- if (access_ok(src, len))
- return csum_partial_copy_from_user(src, dst, len, sum, err_ptr);
-
- if (len)
- *err_ptr = -EFAULT;
-
- return sum;
+ if (copy_from_user(dst, src, len))
+ return 0;
+ return csum_partial(dst, len, ~0U);
}
#endif
#ifndef HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user
-(const void *src, void __user *dst, int len, __wsum sum, int *err_ptr)
+static __always_inline __wsum csum_and_copy_to_user
+(const void *src, void __user *dst, int len)
{
- sum = csum_partial(src, len, sum);
+ __wsum sum = csum_partial(src, len, ~0U);
- if (access_ok(dst, len)) {
- if (copy_to_user(dst, src, len) == 0)
- return sum;
- }
- if (len)
- *err_ptr = -EFAULT;
+ if (copy_to_user(dst, src, len) == 0)
+ return sum;
+ return 0;
+}
+#endif
- return (__force __wsum)-1; /* invalid checksum */
+#ifndef _HAVE_ARCH_CSUM_AND_COPY
+static __always_inline __wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+ memcpy(dst, src, len);
+ return csum_partial(dst, len, 0);
}
#endif
#ifndef HAVE_ARCH_CSUM_ADD
-static inline __wsum csum_add(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
{
u32 res = (__force u32)csum;
res += (__force u32)addend;
@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
}
#endif
-static inline __wsum csum_sub(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_sub(__wsum csum, __wsum addend)
{
return csum_add(csum, ~addend);
}
-static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
+static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend)
{
u16 res = (__force u16)csum;
@@ -75,53 +75,58 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
return (__force __sum16)(res + (res < (__force u16)addend));
}
-static inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
+static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
{
return csum16_add(csum, ~addend);
}
-static inline __wsum
-csum_block_add(__wsum csum, __wsum csum2, int offset)
+#ifndef HAVE_ARCH_CSUM_SHIFT
+static __always_inline __wsum csum_shift(__wsum sum, int offset)
{
- u32 sum = (__force u32)csum2;
-
/* rotate sum to align it with a 16b boundary */
if (offset & 1)
- sum = ror32(sum, 8);
+ return (__force __wsum)ror32((__force u32)sum, 8);
+ return sum;
+}
+#endif
- return csum_add(csum, (__force __wsum)sum);
+static __always_inline __wsum
+csum_block_add(__wsum csum, __wsum csum2, int offset)
+{
+ return csum_add(csum, csum_shift(csum2, offset));
}
-static inline __wsum
+static __always_inline __wsum
csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
{
return csum_block_add(csum, csum2, offset);
}
-static inline __wsum
+static __always_inline __wsum
csum_block_sub(__wsum csum, __wsum csum2, int offset)
{
return csum_block_add(csum, ~csum2, offset);
}
-static inline __wsum csum_unfold(__sum16 n)
+static __always_inline __wsum csum_unfold(__sum16 n)
{
return (__force __wsum)n;
}
-static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
+static __always_inline
+__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
{
return csum_partial(buff, len, sum);
}
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
-static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
+static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
{
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
}
-static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
+static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
@@ -134,11 +139,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
* m : old value of a 16bit field
* m' : new value of a 16bit field
*/
-static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
+static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
{
*sum = ~csum16_add(csum16_sub(~(*sum), old), new);
}
+static inline void csum_replace(__wsum *csum, __wsum old, __wsum new)
+{
+ *csum = csum_add(csum_sub(*csum, old), new);
+}
+
struct sk_buff;
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to, bool pseudohdr);
@@ -148,16 +158,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
__wsum diff, bool pseudohdr);
-static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
- __be16 from, __be16 to,
- bool pseudohdr)
+static __always_inline
+void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
+ __be16 from, __be16 to, bool pseudohdr)
{
inet_proto_csum_replace4(sum, skb, (__force __be32)from,
(__force __be32)to, pseudohdr);
}
-static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
- int start, int offset)
+static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
+ int start, int offset)
{
__sum16 *psum = (__sum16 *)(ptr + offset);
__wsum delta;
@@ -173,9 +183,13 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
return delta;
}
-static inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
+static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
{
*psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
}
+static __always_inline __wsum wsum_negate(__wsum val)
+{
+ return (__force __wsum)-((__force u32)val);
+}
#endif
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 428b6725b248..53dd7d988a2d 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -150,18 +150,6 @@ static inline int cipso_v4_doi_walk(u32 *skip_cnt,
{
return 0;
}
-
-static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def,
- const char *domain)
-{
- return -ENOSYS;
-}
-
-static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
- const char *domain)
-{
- return 0;
-}
#endif /* CONFIG_NETLABEL */
/*
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 4295de3e6a4b..7e78e7d6f015 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -45,9 +45,14 @@ static inline void sock_update_classid(struct sock_cgroup_data *skcd)
sock_cgroup_set_classid(skcd, classid);
}
+static inline u32 __task_get_classid(struct task_struct *task)
+{
+ return task_cls_state(task)->classid;
+}
+
static inline u32 task_get_classid(const struct sk_buff *skb)
{
- u32 classid = task_cls_state(current)->classid;
+ u32 classid = __task_get_classid(current);
/* Due to the nature of the classifier it is required to ignore all
* packets originating from softirq context as accessing `current'
diff --git a/include/net/codel.h b/include/net/codel.h
index a6e428f80135..5fed2f16cb8d 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -44,8 +44,6 @@
#include <linux/types.h>
#include <linux/ktime.h>
#include <linux/skbuff.h>
-#include <net/pkt_sched.h>
-#include <net/inet_ecn.h>
/* Controlling Queue Delay (CoDel) algorithm
* =========================================
@@ -102,6 +100,9 @@ static inline u32 codel_time_to_us(codel_time_t val)
* @interval: width of moving time window
* @mtu: device mtu, or minimal queue backlog in bytes.
* @ecn: is Explicit Congestion Notification enabled
+ * @ce_threshold_selector: apply ce_threshold to packets matching this value
+ * in the diffserv/ECN byte of the IP header
+ * @ce_threshold_mask: mask to apply to ce_threshold_selector comparison
*/
struct codel_params {
codel_time_t target;
@@ -109,6 +110,8 @@ struct codel_params {
codel_time_t interval;
u32 mtu;
bool ecn;
+ u8 ce_threshold_selector;
+ u8 ce_threshold_mask;
};
/**
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index d289b91dcd65..78a27ac73070 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -49,11 +49,15 @@
* Implemented on linux by Dave Taht and Eric Dumazet
*/
+#include <net/inet_ecn.h>
+
static void codel_params_init(struct codel_params *params)
{
params->interval = MS2TIME(100);
params->target = MS2TIME(5);
params->ce_threshold = CODEL_DISABLED_THRESHOLD;
+ params->ce_threshold_mask = 0;
+ params->ce_threshold_selector = 0;
params->ecn = false;
}
@@ -246,9 +250,19 @@ static struct sk_buff *codel_dequeue(void *ctx,
vars->rec_inv_sqrt);
}
end:
- if (skb && codel_time_after(vars->ldelay, params->ce_threshold) &&
- INET_ECN_set_ce(skb))
- stats->ce_mark++;
+ if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
+ bool set_ce = true;
+
+ if (params->ce_threshold_mask) {
+ int dsfield = skb_get_dsfield(skb);
+
+ set_ce = (dsfield >= 0 &&
+ (((u8)dsfield & params->ce_threshold_mask) ==
+ params->ce_threshold_selector));
+ }
+ if (set_ce && INET_ECN_set_ce(skb))
+ stats->ce_mark++;
+ }
return skb;
}
diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h
index 098630f83a55..7d3d9219f4fe 100644
--- a/include/net/codel_qdisc.h
+++ b/include/net/codel_qdisc.h
@@ -49,6 +49,9 @@
* Implemented on linux by Dave Taht and Eric Dumazet
*/
+#include <net/codel.h>
+#include <net/pkt_sched.h>
+
/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */
struct codel_skb_cb {
codel_time_t enqueue_time;
diff --git a/include/net/compat.h b/include/net/compat.h
index f277653c7e17..84c163f40f38 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -5,8 +5,6 @@
struct sock;
-#if defined(CONFIG_COMPAT)
-
#include <linux/compat.h>
struct compat_msghdr {
@@ -30,27 +28,68 @@ struct compat_cmsghdr {
compat_int_t cmsg_type;
};
-#else /* defined(CONFIG_COMPAT) */
-/*
- * To avoid compiler warnings:
- */
-#define compat_msghdr msghdr
-#define compat_mmsghdr mmsghdr
-#endif /* defined(CONFIG_COMPAT) */
+struct compat_rtentry {
+ u32 rt_pad1;
+ struct sockaddr rt_dst; /* target address */
+ struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
+ struct sockaddr rt_genmask; /* target network mask (IP) */
+ unsigned short rt_flags;
+ short rt_pad2;
+ u32 rt_pad3;
+ unsigned char rt_tos;
+ unsigned char rt_class;
+ short rt_pad4;
+ short rt_metric; /* +1 for binary compatibility! */
+ compat_uptr_t rt_dev; /* forcing the device at add */
+ u32 rt_mtu; /* per route MTU/Window */
+ u32 rt_window; /* Window clamping */
+ unsigned short rt_irtt; /* Initial RTT */
+};
+int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr *msg,
+ struct sockaddr __user **save_addr);
int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
struct sockaddr __user **, struct iovec **);
-struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval);
int put_cmsg_compat(struct msghdr*, int, int, int, void *);
int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *,
unsigned char *, int);
-int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int,
- int (*)(struct sock *, int, int, char __user *,
- unsigned int));
-int compat_mc_getsockopt(struct sock *, int, int, char __user *, int __user *,
- int (*)(struct sock *, int, int, char __user *,
- int __user *));
+struct compat_group_req {
+ __u32 gr_interface;
+ struct __kernel_sockaddr_storage gr_group
+ __aligned(4);
+} __packed;
+
+struct compat_group_source_req {
+ __u32 gsr_interface;
+ struct __kernel_sockaddr_storage gsr_group
+ __aligned(4);
+ struct __kernel_sockaddr_storage gsr_source
+ __aligned(4);
+} __packed;
+
+struct compat_group_filter {
+ union {
+ struct {
+ __u32 gf_interface_aux;
+ struct __kernel_sockaddr_storage gf_group_aux
+ __aligned(4);
+ __u32 gf_fmode_aux;
+ __u32 gf_numsrc_aux;
+ struct __kernel_sockaddr_storage gf_slist[1]
+ __aligned(4);
+ } __packed;
+ struct {
+ __u32 gf_interface;
+ struct __kernel_sockaddr_storage gf_group
+ __aligned(4);
+ __u32 gf_fmode;
+ __u32 gf_numsrc;
+ struct __kernel_sockaddr_storage gf_slist_flex[]
+ __aligned(4);
+ } __packed;
+ };
+} __packed;
#endif /* NET_COMPAT_H */
diff --git a/include/net/datalink.h b/include/net/datalink.h
index a9663229b913..c837ffc7ebf8 100644
--- a/include/net/datalink.h
+++ b/include/net/datalink.h
@@ -2,6 +2,13 @@
#ifndef _NET_INET_DATALINK_H_
#define _NET_INET_DATALINK_H_
+#include <linux/list.h>
+
+struct llc_sap;
+struct net_device;
+struct packet_type;
+struct sk_buff;
+
struct datalink_proto {
unsigned char type[8];
@@ -12,7 +19,7 @@ struct datalink_proto {
int (*rcvfunc)(struct sk_buff *, struct net_device *,
struct packet_type *, struct net_device *);
int (*request)(struct datalink_proto *, struct sk_buff *,
- unsigned char *);
+ const unsigned char *);
struct list_head node;
};
diff --git a/include/net/dcbevent.h b/include/net/dcbevent.h
index 43e34131a53f..02700262f71a 100644
--- a/include/net/dcbevent.h
+++ b/include/net/dcbevent.h
@@ -8,6 +8,8 @@
#ifndef _DCB_EVENT_H
#define _DCB_EVENT_H
+struct notifier_block;
+
enum dcbevent_notif_type {
DCB_APP_EVENT = 1,
};
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index e4ad58c4062c..2b2d86fb3131 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -10,6 +10,8 @@
#include <linux/dcbnl.h>
+struct net_device;
+
struct dcb_app_type {
int ifindex;
struct dcb_app app;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index ce5cea428fdc..ba6b8b094943 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -16,70 +16,111 @@
#include <linux/workqueue.h>
#include <linux/refcount.h>
#include <net/net_namespace.h>
+#include <net/flow_offload.h>
#include <uapi/linux/devlink.h>
+#include <linux/xarray.h>
+#include <linux/firmware.h>
-struct devlink_ops;
-
-struct devlink {
- struct list_head list;
- struct list_head port_list;
- struct list_head sb_list;
- struct list_head dpipe_table_list;
- struct list_head resource_list;
- struct list_head param_list;
- struct list_head region_list;
- u32 snapshot_id;
- struct list_head reporter_list;
- struct mutex reporters_lock; /* protects reporter_list */
- struct devlink_dpipe_headers *dpipe_headers;
- struct list_head trap_list;
- struct list_head trap_group_list;
- const struct devlink_ops *ops;
- struct device *dev;
- possible_net_t _net;
- struct mutex lock;
- u8 reload_failed:1,
- reload_enabled:1,
- registered:1;
- char priv[0] __aligned(NETDEV_ALIGN);
-};
+struct devlink;
+struct devlink_linecard;
struct devlink_port_phys_attrs {
u32 port_number; /* Same value as "split group".
* A physical port which is visible to the user
* for a given port flavour.
*/
- u32 split_subport_number;
+ u32 split_subport_number; /* If the port is split, this is the number of subport. */
};
+/**
+ * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
+ * @controller: Associated controller number
+ * @pf: Associated PCI PF number for this port.
+ * @external: when set, indicates if a port is for an external controller
+ */
struct devlink_port_pci_pf_attrs {
- u16 pf; /* Associated PCI PF for this port. */
+ u32 controller;
+ u16 pf;
+ u8 external:1;
};
+/**
+ * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
+ * @controller: Associated controller number
+ * @pf: Associated PCI PF number for this port.
+ * @vf: Associated PCI VF for of the PCI PF for this port.
+ * @external: when set, indicates if a port is for an external controller
+ */
struct devlink_port_pci_vf_attrs {
- u16 pf; /* Associated PCI PF for this port. */
- u16 vf; /* Associated PCI VF for of the PCI PF for this port. */
+ u32 controller;
+ u16 pf;
+ u16 vf;
+ u8 external:1;
+};
+
+/**
+ * struct devlink_port_pci_sf_attrs - devlink port's PCI SF attributes
+ * @controller: Associated controller number
+ * @sf: Associated PCI SF for of the PCI PF for this port.
+ * @pf: Associated PCI PF number for this port.
+ * @external: when set, indicates if a port is for an external controller
+ */
+struct devlink_port_pci_sf_attrs {
+ u32 controller;
+ u32 sf;
+ u16 pf;
+ u8 external:1;
};
+/**
+ * struct devlink_port_attrs - devlink port object
+ * @flavour: flavour of the port
+ * @split: indicates if this is split port
+ * @splittable: indicates if the port can be split.
+ * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink.
+ * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL
+ * @phys: physical port attributes
+ * @pci_pf: PCI PF port attributes
+ * @pci_vf: PCI VF port attributes
+ * @pci_sf: PCI SF port attributes
+ */
struct devlink_port_attrs {
- u8 set:1,
- split:1,
- switch_port:1;
+ u8 split:1,
+ splittable:1;
+ u32 lanes;
enum devlink_port_flavour flavour;
struct netdev_phys_item_id switch_id;
union {
struct devlink_port_phys_attrs phys;
struct devlink_port_pci_pf_attrs pci_pf;
struct devlink_port_pci_vf_attrs pci_vf;
+ struct devlink_port_pci_sf_attrs pci_sf;
+ };
+};
+
+struct devlink_rate {
+ struct list_head list;
+ enum devlink_rate_type type;
+ struct devlink *devlink;
+ void *priv;
+ u64 tx_share;
+ u64 tx_max;
+
+ struct devlink_rate *parent;
+ union {
+ struct devlink_port *devlink_port;
+ struct {
+ char *name;
+ refcount_t refcnt;
+ };
};
};
struct devlink_port {
struct list_head list;
- struct list_head param_list;
+ struct list_head region_list;
struct devlink *devlink;
unsigned int index;
- bool registered;
spinlock_t type_lock; /* Protects type and type_dev
* pointer consistency.
*/
@@ -87,7 +128,61 @@ struct devlink_port {
enum devlink_port_type desired_type;
void *type_dev;
struct devlink_port_attrs attrs;
+ u8 attrs_set:1,
+ switch_port:1,
+ registered:1,
+ initialized:1;
struct delayed_work type_warn_dw;
+ struct list_head reporter_list;
+ struct mutex reporters_lock; /* Protects reporter_list */
+
+ struct devlink_rate *devlink_rate;
+ struct devlink_linecard *linecard;
+};
+
+struct devlink_port_new_attrs {
+ enum devlink_port_flavour flavour;
+ unsigned int port_index;
+ u32 controller;
+ u32 sfnum;
+ u16 pfnum;
+ u8 port_index_valid:1,
+ controller_valid:1,
+ sfnum_valid:1;
+};
+
+/**
+ * struct devlink_linecard_ops - Linecard operations
+ * @provision: callback to provision the linecard slot with certain
+ * type of linecard. As a result of this operation,
+ * driver is expected to eventually (could be after
+ * the function call returns) call one of:
+ * devlink_linecard_provision_set()
+ * devlink_linecard_provision_fail()
+ * @unprovision: callback to unprovision the linecard slot. As a result
+ * of this operation, driver is expected to eventually
+ * (could be after the function call returns) call
+ * devlink_linecard_provision_clear()
+ * devlink_linecard_provision_fail()
+ * @same_provision: callback to ask the driver if linecard is already
+ * provisioned in the same way user asks this linecard to be
+ * provisioned.
+ * @types_count: callback to get number of supported types
+ * @types_get: callback to get next type in list
+ */
+struct devlink_linecard_ops {
+ int (*provision)(struct devlink_linecard *linecard, void *priv,
+ const char *type, const void *type_priv,
+ struct netlink_ext_ack *extack);
+ int (*unprovision)(struct devlink_linecard *linecard, void *priv,
+ struct netlink_ext_ack *extack);
+ bool (*same_provision)(struct devlink_linecard *linecard, void *priv,
+ const char *type, const void *type_priv);
+ unsigned int (*types_count)(struct devlink_linecard *linecard,
+ void *priv);
+ void (*types_get)(struct devlink_linecard *linecard,
+ void *priv, unsigned int index, const char **type,
+ const void **type_priv);
};
struct devlink_sb_pool_info {
@@ -303,35 +398,10 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para
typedef u64 devlink_resource_occ_get_t(void *priv);
-/**
- * struct devlink_resource - devlink resource
- * @name: name of the resource
- * @id: id, per devlink instance
- * @size: size of the resource
- * @size_new: updated size of the resource, reload is needed
- * @size_valid: valid in case the total size of the resource is valid
- * including its children
- * @parent: parent resource
- * @size_params: size parameters
- * @list: parent list
- * @resource_list: list of child resources
- */
-struct devlink_resource {
- const char *name;
- u64 id;
- u64 size;
- u64 size_new;
- bool size_valid;
- struct devlink_resource *parent;
- struct devlink_resource_size_params size_params;
- struct list_head list;
- struct list_head resource_list;
- devlink_resource_occ_get_t *occ_get;
- void *occ_get_priv;
-};
-
#define DEVLINK_RESOURCE_ID_PARENT_TOP 0
+#define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports"
+
#define __DEVLINK_PARAM_MAX_STRING_VALUE 32
enum devlink_param_type {
DEVLINK_PARAM_TYPE_U8,
@@ -355,6 +425,25 @@ struct devlink_param_gset_ctx {
};
/**
+ * struct devlink_flash_notify - devlink dev flash notify data
+ * @status_msg: current status string
+ * @component: firmware component being updated
+ * @done: amount of work completed of total amount
+ * @total: amount of work expected to be done
+ * @timeout: expected max timeout in seconds
+ *
+ * These are values to be given to userland to be displayed in order
+ * to show current activity in a firmware update process.
+ */
+struct devlink_flash_notify {
+ const char *status_msg;
+ const char *component;
+ unsigned long done;
+ unsigned long total;
+ unsigned long timeout;
+};
+
+/**
* struct devlink_param - devlink configuration parameter data
* @name: name of the parameter
* @generic: indicates if the parameter is generic or driver specific
@@ -389,7 +478,6 @@ struct devlink_param_item {
const struct devlink_param *param;
union devlink_param_value driverinit_value;
bool driverinit_value_valid;
- bool published;
};
enum devlink_param_generic_id {
@@ -403,6 +491,13 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -440,6 +535,27 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME "enable_roce"
#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE DEVLINK_PARAM_TYPE_BOOL
+#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME "enable_remote_dev_reset"
+#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME "enable_eth"
+#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME "enable_rdma"
+#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet"
+#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp"
+#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size"
+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32
+
+#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size"
+#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
@@ -479,6 +595,8 @@ enum devlink_param_generic_id {
#define DEVLINK_INFO_VERSION_GENERIC_FW "fw"
/* Control processor FW version */
#define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT "fw.mgmt"
+/* FW interface specification version */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT_API "fw.mgmt.api"
/* Data path microcode controlling high-speed packet processing */
#define DEVLINK_INFO_VERSION_GENERIC_FW_APP "fw.app"
/* UNDI software version */
@@ -489,11 +607,68 @@ enum devlink_param_generic_id {
#define DEVLINK_INFO_VERSION_GENERIC_FW_PSID "fw.psid"
/* RoCE FW version */
#define DEVLINK_INFO_VERSION_GENERIC_FW_ROCE "fw.roce"
+/* Firmware bundle identifier */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID "fw.bundle_id"
+
+/**
+ * struct devlink_flash_update_params - Flash Update parameters
+ * @fw: pointer to the firmware data to update from
+ * @component: the flash component to update
+ *
+ * With the exception of fw, drivers must opt-in to parameters by
+ * setting the appropriate bit in the supported_flash_update_params field in
+ * their devlink_ops structure.
+ */
+struct devlink_flash_update_params {
+ const struct firmware *fw;
+ const char *component;
+ u32 overwrite_mask;
+};
+
+#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(0)
struct devlink_region;
struct devlink_info_req;
-typedef void devlink_snapshot_data_dest_t(const void *data);
+/**
+ * struct devlink_region_ops - Region operations
+ * @name: region name
+ * @destructor: callback used to free snapshot memory when deleting
+ * @snapshot: callback to request an immediate snapshot. On success,
+ * the data variable must be updated to point to the snapshot data.
+ * The function will be called while the devlink instance lock is
+ * held.
+ * @priv: Pointer to driver private data for the region operation
+ */
+struct devlink_region_ops {
+ const char *name;
+ void (*destructor)(const void *data);
+ int (*snapshot)(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ struct netlink_ext_ack *extack,
+ u8 **data);
+ void *priv;
+};
+
+/**
+ * struct devlink_port_region_ops - Region operations for a port
+ * @name: region name
+ * @destructor: callback used to free snapshot memory when deleting
+ * @snapshot: callback to request an immediate snapshot. On success,
+ * the data variable must be updated to point to the snapshot data.
+ * The function will be called while the devlink instance lock is
+ * held.
+ * @priv: Pointer to driver private data for the region operation
+ */
+struct devlink_port_region_ops {
+ const char *name;
+ void (*destructor)(const void *data);
+ int (*snapshot)(struct devlink_port *port,
+ const struct devlink_port_region_ops *ops,
+ struct netlink_ext_ack *extack,
+ u8 **data);
+ void *priv;
+};
struct devlink_fmsg;
struct devlink_health_reporter;
@@ -511,6 +686,7 @@ enum devlink_health_reporter_state {
* @dump: callback to dump an object
* if priv_ctx is NULL, run a full dump
* @diagnose: callback to diagnose the current status
+ * @test: callback to trigger a test event
*/
struct devlink_health_reporter_ops {
@@ -523,6 +699,51 @@ struct devlink_health_reporter_ops {
int (*diagnose)(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg,
struct netlink_ext_ack *extack);
+ int (*test)(struct devlink_health_reporter *reporter,
+ struct netlink_ext_ack *extack);
+};
+
+/**
+ * struct devlink_trap_metadata - Packet trap metadata.
+ * @trap_name: Trap name.
+ * @trap_group_name: Trap group name.
+ * @input_dev: Input netdevice.
+ * @dev_tracker: refcount tracker for @input_dev.
+ * @fa_cookie: Flow action user cookie.
+ * @trap_type: Trap type.
+ */
+struct devlink_trap_metadata {
+ const char *trap_name;
+ const char *trap_group_name;
+
+ struct net_device *input_dev;
+ netdevice_tracker dev_tracker;
+
+ const struct flow_action_cookie *fa_cookie;
+ enum devlink_trap_type trap_type;
+};
+
+/**
+ * struct devlink_trap_policer - Immutable packet trap policer attributes.
+ * @id: Policer identifier.
+ * @init_rate: Initial rate in packets / sec.
+ * @init_burst: Initial burst size in packets.
+ * @max_rate: Maximum rate.
+ * @min_rate: Minimum rate.
+ * @max_burst: Maximum burst size.
+ * @min_burst: Minimum burst size.
+ *
+ * Describes immutable attributes of packet trap policers that drivers register
+ * with devlink.
+ */
+struct devlink_trap_policer {
+ u32 id;
+ u64 init_rate;
+ u64 init_burst;
+ u64 max_rate;
+ u64 min_rate;
+ u64 max_burst;
+ u64 min_burst;
};
/**
@@ -530,6 +751,7 @@ struct devlink_health_reporter_ops {
* @name: Trap group name.
* @id: Trap group identifier.
* @generic: Whether the trap group is generic or not.
+ * @init_policer_id: Initial policer identifier.
*
* Describes immutable attributes of packet trap groups that drivers register
* with devlink.
@@ -538,9 +760,11 @@ struct devlink_trap_group {
const char *name;
u16 id;
bool generic;
+ u32 init_policer_id;
};
#define DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT BIT(0)
+#define DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE BIT(1)
/**
* struct devlink_trap - Immutable packet trap attributes.
@@ -549,7 +773,7 @@ struct devlink_trap_group {
* @generic: Whether the trap is generic or not.
* @id: Trap identifier.
* @name: Trap name.
- * @group: Immutable packet trap group attributes.
+ * @init_group_id: Initial group identifier.
* @metadata_cap: Metadata types that can be provided by the trap.
*
* Describes immutable attributes of packet traps that drivers register with
@@ -561,7 +785,7 @@ struct devlink_trap {
bool generic;
u16 id;
const char *name;
- struct devlink_trap_group group;
+ u16 init_group_id;
u32 metadata_cap;
};
@@ -596,6 +820,71 @@ enum devlink_trap_generic_id {
DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE,
DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR,
DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC,
+ DEVLINK_TRAP_GENERIC_ID_INGRESS_FLOW_ACTION_DROP,
+ DEVLINK_TRAP_GENERIC_ID_EGRESS_FLOW_ACTION_DROP,
+ DEVLINK_TRAP_GENERIC_ID_STP,
+ DEVLINK_TRAP_GENERIC_ID_LACP,
+ DEVLINK_TRAP_GENERIC_ID_LLDP,
+ DEVLINK_TRAP_GENERIC_ID_IGMP_QUERY,
+ DEVLINK_TRAP_GENERIC_ID_IGMP_V1_REPORT,
+ DEVLINK_TRAP_GENERIC_ID_IGMP_V2_REPORT,
+ DEVLINK_TRAP_GENERIC_ID_IGMP_V3_REPORT,
+ DEVLINK_TRAP_GENERIC_ID_IGMP_V2_LEAVE,
+ DEVLINK_TRAP_GENERIC_ID_MLD_QUERY,
+ DEVLINK_TRAP_GENERIC_ID_MLD_V1_REPORT,
+ DEVLINK_TRAP_GENERIC_ID_MLD_V2_REPORT,
+ DEVLINK_TRAP_GENERIC_ID_MLD_V1_DONE,
+ DEVLINK_TRAP_GENERIC_ID_IPV4_DHCP,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_DHCP,
+ DEVLINK_TRAP_GENERIC_ID_ARP_REQUEST,
+ DEVLINK_TRAP_GENERIC_ID_ARP_RESPONSE,
+ DEVLINK_TRAP_GENERIC_ID_ARP_OVERLAY,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_NEIGH_SOLICIT,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_NEIGH_ADVERT,
+ DEVLINK_TRAP_GENERIC_ID_IPV4_BFD,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_BFD,
+ DEVLINK_TRAP_GENERIC_ID_IPV4_OSPF,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_OSPF,
+ DEVLINK_TRAP_GENERIC_ID_IPV4_BGP,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_BGP,
+ DEVLINK_TRAP_GENERIC_ID_IPV4_VRRP,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_VRRP,
+ DEVLINK_TRAP_GENERIC_ID_IPV4_PIM,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_PIM,
+ DEVLINK_TRAP_GENERIC_ID_UC_LB,
+ DEVLINK_TRAP_GENERIC_ID_LOCAL_ROUTE,
+ DEVLINK_TRAP_GENERIC_ID_EXTERNAL_ROUTE,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_UC_DIP_LINK_LOCAL_SCOPE,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_DIP_ALL_NODES,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_DIP_ALL_ROUTERS,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_ROUTER_SOLICIT,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_ROUTER_ADVERT,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_REDIRECT,
+ DEVLINK_TRAP_GENERIC_ID_IPV4_ROUTER_ALERT,
+ DEVLINK_TRAP_GENERIC_ID_IPV6_ROUTER_ALERT,
+ DEVLINK_TRAP_GENERIC_ID_PTP_EVENT,
+ DEVLINK_TRAP_GENERIC_ID_PTP_GENERAL,
+ DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_SAMPLE,
+ DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_TRAP,
+ DEVLINK_TRAP_GENERIC_ID_EARLY_DROP,
+ DEVLINK_TRAP_GENERIC_ID_VXLAN_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_LLC_SNAP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_VLAN_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_PPPOE_PPP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_MPLS_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_ARP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_IP_1_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_IP_N_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_GRE_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_UDP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_TCP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_IPSEC_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_SCTP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_DCCP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_GTP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_ESP_PARSING,
+ DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP,
+ DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER,
/* Add new generic trap IDs above */
__DEVLINK_TRAP_GENERIC_ID_MAX,
@@ -608,8 +897,30 @@ enum devlink_trap_generic_id {
enum devlink_trap_group_generic_id {
DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS,
DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_L3_EXCEPTIONS,
DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS,
DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_STP,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_LACP,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_LLDP,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_MC_SNOOPING,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_DHCP,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_NEIGH_DISCOVERY,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_BFD,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_OSPF,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_BGP,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_VRRP,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_PIM,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_UC_LB,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_LOCAL_DELIVERY,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_EXTERNAL_DELIVERY,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_IPV6,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_EVENT,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_GENERAL,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS,
/* Add new generic trap group IDs above */
__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
@@ -671,28 +982,203 @@ enum devlink_trap_group_generic_id {
"decap_error"
#define DEVLINK_TRAP_GENERIC_NAME_OVERLAY_SMAC_MC \
"overlay_smac_is_mc"
+#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_FLOW_ACTION_DROP \
+ "ingress_flow_action_drop"
+#define DEVLINK_TRAP_GENERIC_NAME_EGRESS_FLOW_ACTION_DROP \
+ "egress_flow_action_drop"
+#define DEVLINK_TRAP_GENERIC_NAME_STP \
+ "stp"
+#define DEVLINK_TRAP_GENERIC_NAME_LACP \
+ "lacp"
+#define DEVLINK_TRAP_GENERIC_NAME_LLDP \
+ "lldp"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_QUERY \
+ "igmp_query"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V1_REPORT \
+ "igmp_v1_report"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V2_REPORT \
+ "igmp_v2_report"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V3_REPORT \
+ "igmp_v3_report"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V2_LEAVE \
+ "igmp_v2_leave"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_QUERY \
+ "mld_query"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_V1_REPORT \
+ "mld_v1_report"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_V2_REPORT \
+ "mld_v2_report"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_V1_DONE \
+ "mld_v1_done"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_DHCP \
+ "ipv4_dhcp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_DHCP \
+ "ipv6_dhcp"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_REQUEST \
+ "arp_request"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_RESPONSE \
+ "arp_response"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_OVERLAY \
+ "arp_overlay"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_NEIGH_SOLICIT \
+ "ipv6_neigh_solicit"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_NEIGH_ADVERT \
+ "ipv6_neigh_advert"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_BFD \
+ "ipv4_bfd"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_BFD \
+ "ipv6_bfd"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_OSPF \
+ "ipv4_ospf"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_OSPF \
+ "ipv6_ospf"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_BGP \
+ "ipv4_bgp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_BGP \
+ "ipv6_bgp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_VRRP \
+ "ipv4_vrrp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_VRRP \
+ "ipv6_vrrp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_PIM \
+ "ipv4_pim"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_PIM \
+ "ipv6_pim"
+#define DEVLINK_TRAP_GENERIC_NAME_UC_LB \
+ "uc_loopback"
+#define DEVLINK_TRAP_GENERIC_NAME_LOCAL_ROUTE \
+ "local_route"
+#define DEVLINK_TRAP_GENERIC_NAME_EXTERNAL_ROUTE \
+ "external_route"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_UC_DIP_LINK_LOCAL_SCOPE \
+ "ipv6_uc_dip_link_local_scope"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_DIP_ALL_NODES \
+ "ipv6_dip_all_nodes"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_DIP_ALL_ROUTERS \
+ "ipv6_dip_all_routers"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_ROUTER_SOLICIT \
+ "ipv6_router_solicit"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_ROUTER_ADVERT \
+ "ipv6_router_advert"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_REDIRECT \
+ "ipv6_redirect"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_ROUTER_ALERT \
+ "ipv4_router_alert"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_ROUTER_ALERT \
+ "ipv6_router_alert"
+#define DEVLINK_TRAP_GENERIC_NAME_PTP_EVENT \
+ "ptp_event"
+#define DEVLINK_TRAP_GENERIC_NAME_PTP_GENERAL \
+ "ptp_general"
+#define DEVLINK_TRAP_GENERIC_NAME_FLOW_ACTION_SAMPLE \
+ "flow_action_sample"
+#define DEVLINK_TRAP_GENERIC_NAME_FLOW_ACTION_TRAP \
+ "flow_action_trap"
+#define DEVLINK_TRAP_GENERIC_NAME_EARLY_DROP \
+ "early_drop"
+#define DEVLINK_TRAP_GENERIC_NAME_VXLAN_PARSING \
+ "vxlan_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_LLC_SNAP_PARSING \
+ "llc_snap_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_VLAN_PARSING \
+ "vlan_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_PPPOE_PPP_PARSING \
+ "pppoe_ppp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_MPLS_PARSING \
+ "mpls_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_PARSING \
+ "arp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_IP_1_PARSING \
+ "ip_1_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_IP_N_PARSING \
+ "ip_n_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_GRE_PARSING \
+ "gre_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_UDP_PARSING \
+ "udp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_TCP_PARSING \
+ "tcp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_IPSEC_PARSING \
+ "ipsec_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_SCTP_PARSING \
+ "sctp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_DCCP_PARSING \
+ "dccp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_GTP_PARSING \
+ "gtp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_ESP_PARSING \
+ "esp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_NEXTHOP \
+ "blackhole_nexthop"
+#define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \
+ "dmac_filter"
#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
"l2_drops"
#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L3_DROPS \
"l3_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L3_EXCEPTIONS \
+ "l3_exceptions"
#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BUFFER_DROPS \
"buffer_drops"
#define DEVLINK_TRAP_GROUP_GENERIC_NAME_TUNNEL_DROPS \
"tunnel_drops"
-
-#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group, _metadata_cap) \
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_DROPS \
+ "acl_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_STP \
+ "stp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_LACP \
+ "lacp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_LLDP \
+ "lldp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_MC_SNOOPING \
+ "mc_snooping"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_DHCP \
+ "dhcp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_NEIGH_DISCOVERY \
+ "neigh_discovery"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BFD \
+ "bfd"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_OSPF \
+ "ospf"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BGP \
+ "bgp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_VRRP \
+ "vrrp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PIM \
+ "pim"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_UC_LB \
+ "uc_loopback"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_LOCAL_DELIVERY \
+ "local_delivery"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_EXTERNAL_DELIVERY \
+ "external_delivery"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_IPV6 \
+ "ipv6"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PTP_EVENT \
+ "ptp_event"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PTP_GENERAL \
+ "ptp_general"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_SAMPLE \
+ "acl_sample"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_TRAP \
+ "acl_trap"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PARSER_ERROR_DROPS \
+ "parser_error_drops"
+
+#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id, \
+ _metadata_cap) \
{ \
.type = DEVLINK_TRAP_TYPE_##_type, \
.init_action = DEVLINK_TRAP_ACTION_##_init_action, \
.generic = true, \
.id = DEVLINK_TRAP_GENERIC_ID_##_id, \
.name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
- .group = _group, \
+ .init_group_id = _group_id, \
.metadata_cap = _metadata_cap, \
}
-#define DEVLINK_TRAP_DRIVER(_type, _init_action, _id, _name, _group, \
+#define DEVLINK_TRAP_DRIVER(_type, _init_action, _id, _name, _group_id, \
_metadata_cap) \
{ \
.type = DEVLINK_TRAP_TYPE_##_type, \
@@ -700,27 +1186,56 @@ enum devlink_trap_group_generic_id {
.generic = false, \
.id = _id, \
.name = _name, \
- .group = _group, \
+ .init_group_id = _group_id, \
.metadata_cap = _metadata_cap, \
}
-#define DEVLINK_TRAP_GROUP_GENERIC(_id) \
+#define DEVLINK_TRAP_GROUP_GENERIC(_id, _policer_id) \
{ \
.name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
.id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
.generic = true, \
+ .init_policer_id = _policer_id, \
+ }
+
+#define DEVLINK_TRAP_POLICER(_id, _rate, _burst, _max_rate, _min_rate, \
+ _max_burst, _min_burst) \
+ { \
+ .id = _id, \
+ .init_rate = _rate, \
+ .init_burst = _burst, \
+ .max_rate = _max_rate, \
+ .min_rate = _min_rate, \
+ .max_burst = _max_burst, \
+ .min_burst = _min_burst, \
}
+enum {
+ /* device supports reload operations */
+ DEVLINK_F_RELOAD = 1UL << 0,
+};
+
struct devlink_ops {
+ /**
+ * @supported_flash_update_params:
+ * mask of parameters supported by the driver's .flash_update
+ * implemementation.
+ */
+ u32 supported_flash_update_params;
+ unsigned long reload_actions;
+ unsigned long reload_limits;
int (*reload_down)(struct devlink *devlink, bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
struct netlink_ext_ack *extack);
- int (*reload_up)(struct devlink *devlink,
+ int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action,
+ enum devlink_reload_limit limit, u32 *actions_performed,
struct netlink_ext_ack *extack);
int (*port_type_set)(struct devlink_port *devlink_port,
enum devlink_port_type port_type);
- int (*port_split)(struct devlink *devlink, unsigned int port_index,
+ int (*port_split)(struct devlink *devlink, struct devlink_port *port,
unsigned int count, struct netlink_ext_ack *extack);
- int (*port_unsplit)(struct devlink *devlink, unsigned int port_index,
+ int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port,
struct netlink_ext_ack *extack);
int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index,
u16 pool_index,
@@ -772,8 +1287,15 @@ struct devlink_ops {
struct netlink_ext_ack *extack);
int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
struct netlink_ext_ack *extack);
- int (*flash_update)(struct devlink *devlink, const char *file_name,
- const char *component,
+ /**
+ * @flash_update: Device flash update function
+ *
+ * Used to perform a flash update for the device. The set of
+ * parameters supported by the driver should be set in
+ * supported_flash_update_params.
+ */
+ int (*flash_update)(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
struct netlink_ext_ack *extack);
/**
* @trap_init: Trap initialization function.
@@ -798,7 +1320,8 @@ struct devlink_ops {
*/
int (*trap_action_set)(struct devlink *devlink,
const struct devlink_trap *trap,
- enum devlink_trap_action action);
+ enum devlink_trap_action action,
+ struct netlink_ext_ack *extack);
/**
* @trap_group_init: Trap group initialization function.
*
@@ -807,50 +1330,250 @@ struct devlink_ops {
*/
int (*trap_group_init)(struct devlink *devlink,
const struct devlink_trap_group *group);
-};
-
-static inline void *devlink_priv(struct devlink *devlink)
-{
- BUG_ON(!devlink);
- return &devlink->priv;
-}
-
-static inline struct devlink *priv_to_devlink(void *priv)
-{
- BUG_ON(!priv);
- return container_of(priv, struct devlink, priv);
-}
+ /**
+ * @trap_group_set: Trap group parameters set function.
+ *
+ * Note: @policer can be NULL when a policer is being unbound from
+ * @group.
+ */
+ int (*trap_group_set)(struct devlink *devlink,
+ const struct devlink_trap_group *group,
+ const struct devlink_trap_policer *policer,
+ struct netlink_ext_ack *extack);
+ /**
+ * @trap_group_action_set: Trap group action set function.
+ *
+ * If this callback is populated, it will take precedence over looping
+ * over all traps in a group and calling .trap_action_set().
+ */
+ int (*trap_group_action_set)(struct devlink *devlink,
+ const struct devlink_trap_group *group,
+ enum devlink_trap_action action,
+ struct netlink_ext_ack *extack);
+ /**
+ * @trap_drop_counter_get: Trap drop counter get function.
+ *
+ * Should be used by device drivers to report number of packets
+ * that have been dropped, and cannot be passed to the devlink
+ * subsystem by the underlying device.
+ */
+ int (*trap_drop_counter_get)(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ u64 *p_drops);
+ /**
+ * @trap_policer_init: Trap policer initialization function.
+ *
+ * Should be used by device drivers to initialize the trap policer in
+ * the underlying device.
+ */
+ int (*trap_policer_init)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer);
+ /**
+ * @trap_policer_fini: Trap policer de-initialization function.
+ *
+ * Should be used by device drivers to de-initialize the trap policer
+ * in the underlying device.
+ */
+ void (*trap_policer_fini)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer);
+ /**
+ * @trap_policer_set: Trap policer parameters set function.
+ */
+ int (*trap_policer_set)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer,
+ u64 rate, u64 burst,
+ struct netlink_ext_ack *extack);
+ /**
+ * @trap_policer_counter_get: Trap policer counter get function.
+ *
+ * Should be used by device drivers to report number of packets dropped
+ * by the policer.
+ */
+ int (*trap_policer_counter_get)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer,
+ u64 *p_drops);
+ /**
+ * @port_function_hw_addr_get: Port function's hardware address get function.
+ *
+ * Should be used by device drivers to report the hardware address of a function managed
+ * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
+ * function handling for a particular port.
+ *
+ * Note: @extack can be NULL when port notifier queries the port function.
+ */
+ int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+ int *hw_addr_len,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_function_hw_addr_set: Port function's hardware address set function.
+ *
+ * Should be used by device drivers to set the hardware address of a function managed
+ * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
+ * function handling for a particular port.
+ */
+ int (*port_function_hw_addr_set)(struct devlink_port *port,
+ const u8 *hw_addr, int hw_addr_len,
+ struct netlink_ext_ack *extack);
+ /**
+ * port_new() - Add a new port function of a specified flavor
+ * @devlink: Devlink instance
+ * @attrs: attributes of the new port
+ * @extack: extack for reporting error messages
+ * @new_port_index: index of the new port
+ *
+ * Devlink core will call this device driver function upon user request
+ * to create a new port function of a specified flavor and optional
+ * attributes
+ *
+ * Notes:
+ * - Called without devlink instance lock being held. Drivers must
+ * implement own means of synchronization
+ * - On success, drivers must register a port with devlink core
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+ int (*port_new)(struct devlink *devlink,
+ const struct devlink_port_new_attrs *attrs,
+ struct netlink_ext_ack *extack,
+ unsigned int *new_port_index);
+ /**
+ * port_del() - Delete a port function
+ * @devlink: Devlink instance
+ * @port_index: port function index to delete
+ * @extack: extack for reporting error messages
+ *
+ * Devlink core will call this device driver function upon user request
+ * to delete a previously created port function
+ *
+ * Notes:
+ * - Called without devlink instance lock being held. Drivers must
+ * implement own means of synchronization
+ * - On success, drivers must unregister the corresponding devlink
+ * port
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+ int (*port_del)(struct devlink *devlink, unsigned int port_index,
+ struct netlink_ext_ack *extack);
+ /**
+ * port_fn_state_get() - Get the state of a port function
+ * @devlink: Devlink instance
+ * @port: The devlink port
+ * @state: Admin configured state
+ * @opstate: Current operational state
+ * @extack: extack for reporting error messages
+ *
+ * Reports the admin and operational state of a devlink port function
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+ int (*port_fn_state_get)(struct devlink_port *port,
+ enum devlink_port_fn_state *state,
+ enum devlink_port_fn_opstate *opstate,
+ struct netlink_ext_ack *extack);
+ /**
+ * port_fn_state_set() - Set the admin state of a port function
+ * @devlink: Devlink instance
+ * @port: The devlink port
+ * @state: Admin state
+ * @extack: extack for reporting error messages
+ *
+ * Set the admin state of a devlink port function
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+ int (*port_fn_state_set)(struct devlink_port *port,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack);
-static inline struct devlink_port *
-netdev_to_devlink_port(struct net_device *dev)
-{
- if (dev->netdev_ops->ndo_get_devlink_port)
- return dev->netdev_ops->ndo_get_devlink_port(dev);
- return NULL;
-}
+ /**
+ * Rate control callbacks.
+ */
+ int (*rate_leaf_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+ int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack);
+ int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack);
+ int (*rate_leaf_parent_set)(struct devlink_rate *child,
+ struct devlink_rate *parent,
+ void *priv_child, void *priv_parent,
+ struct netlink_ext_ack *extack);
+ int (*rate_node_parent_set)(struct devlink_rate *child,
+ struct devlink_rate *parent,
+ void *priv_child, void *priv_parent,
+ struct netlink_ext_ack *extack);
+ /**
+ * selftests_check() - queries if selftest is supported
+ * @devlink: devlink instance
+ * @id: test index
+ * @extack: extack for reporting error messages
+ *
+ * Return: true if test is supported by the driver
+ */
+ bool (*selftest_check)(struct devlink *devlink, unsigned int id,
+ struct netlink_ext_ack *extack);
+ /**
+ * selftest_run() - Runs a selftest
+ * @devlink: devlink instance
+ * @id: test index
+ * @extack: extack for reporting error messages
+ *
+ * Return: status of the test
+ */
+ enum devlink_selftest_status
+ (*selftest_run)(struct devlink *devlink, unsigned int id,
+ struct netlink_ext_ack *extack);
+};
-static inline struct devlink *netdev_to_devlink(struct net_device *dev)
-{
- struct devlink_port *devlink_port = netdev_to_devlink_port(dev);
+void *devlink_priv(struct devlink *devlink);
+struct devlink *priv_to_devlink(void *priv);
+struct device *devlink_to_dev(const struct devlink *devlink);
- if (devlink_port)
- return devlink_port->devlink;
- return NULL;
-}
+/* Devlink instance explicit locking */
+void devl_lock(struct devlink *devlink);
+int devl_trylock(struct devlink *devlink);
+void devl_unlock(struct devlink *devlink);
+void devl_assert_locked(struct devlink *devlink);
+bool devl_lock_is_held(struct devlink *devlink);
struct ib_device;
struct net *devlink_net(const struct devlink *devlink);
-void devlink_net_set(struct devlink *devlink, struct net *net);
-struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size);
-int devlink_register(struct devlink *devlink, struct device *dev);
+/* This call is intended for software devices that can create
+ * devlink instances in other namespaces than init_net.
+ *
+ * Drivers that operate on real HW must use devlink_alloc() instead.
+ */
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+ size_t priv_size, struct net *net,
+ struct device *dev);
+static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
+ size_t priv_size,
+ struct device *dev)
+{
+ return devlink_alloc_ns(ops, priv_size, &init_net, dev);
+}
+void devlink_set_features(struct devlink *devlink, u64 features);
+void devlink_register(struct devlink *devlink);
void devlink_unregister(struct devlink *devlink);
-void devlink_reload_enable(struct devlink *devlink);
-void devlink_reload_disable(struct devlink *devlink);
void devlink_free(struct devlink *devlink);
+void devlink_port_init(struct devlink *devlink,
+ struct devlink_port *devlink_port);
+void devlink_port_fini(struct devlink_port *devlink_port);
+int devl_port_register(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index);
int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
unsigned int port_index);
+void devl_port_unregister(struct devlink_port *devlink_port);
void devlink_port_unregister(struct devlink_port *devlink_port);
void devlink_port_type_eth_set(struct devlink_port *devlink_port,
struct net_device *netdev);
@@ -858,32 +1581,50 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
struct ib_device *ibdev);
void devlink_port_type_clear(struct devlink_port *devlink_port);
void devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour,
- u32 port_number, bool split,
- u32 split_subport_number,
- const unsigned char *switch_id,
- unsigned char switch_id_len);
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port,
- const unsigned char *switch_id,
- unsigned char switch_id_len, u16 pf);
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
- const unsigned char *switch_id,
- unsigned char switch_id_len,
- u16 pf, u16 vf);
+ struct devlink_port_attrs *devlink_port_attrs);
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, bool external);
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, u16 vf, bool external);
+void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
+ u32 controller, u16 pf, u32 sf,
+ bool external);
+int devl_rate_leaf_create(struct devlink_port *port, void *priv);
+void devl_rate_leaf_destroy(struct devlink_port *devlink_port);
+void devl_rate_nodes_destroy(struct devlink *devlink);
+void devlink_port_linecard_set(struct devlink_port *devlink_port,
+ struct devlink_linecard *linecard);
+struct devlink_linecard *
+devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv);
+void devlink_linecard_destroy(struct devlink_linecard *linecard);
+void devlink_linecard_provision_set(struct devlink_linecard *linecard,
+ const char *type);
+void devlink_linecard_provision_clear(struct devlink_linecard *linecard);
+void devlink_linecard_provision_fail(struct devlink_linecard *linecard);
+void devlink_linecard_activate(struct devlink_linecard *linecard);
+void devlink_linecard_deactivate(struct devlink_linecard *linecard);
+void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink);
+int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
u16 egress_tc_count);
+void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index);
void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
-int devlink_dpipe_table_register(struct devlink *devlink,
- const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
- void *priv, bool counter_control_extern);
-void devlink_dpipe_table_unregister(struct devlink *devlink,
- const char *table_name);
-int devlink_dpipe_headers_register(struct devlink *devlink,
- struct devlink_dpipe_headers *dpipe_headers);
-void devlink_dpipe_headers_unregister(struct devlink *devlink);
+int devl_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, bool counter_control_extern);
+void devl_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name);
+void devl_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers);
+void devl_dpipe_headers_unregister(struct devlink *devlink);
bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
const char *table_name);
int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx);
@@ -899,24 +1640,40 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ethernet;
extern struct devlink_dpipe_header devlink_dpipe_header_ipv4;
extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
+int devl_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params);
int devlink_resource_register(struct devlink *devlink,
const char *resource_name,
u64 resource_size,
u64 resource_id,
u64 parent_resource_id,
const struct devlink_resource_size_params *size_params);
-void devlink_resources_unregister(struct devlink *devlink,
- struct devlink_resource *resource);
-int devlink_resource_size_get(struct devlink *devlink,
- u64 resource_id,
- u64 *p_resource_size);
+void devl_resources_unregister(struct devlink *devlink);
+void devlink_resources_unregister(struct devlink *devlink);
+int devl_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size);
+int devl_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units);
int devlink_dpipe_table_resource_set(struct devlink *devlink,
const char *table_name, u64 resource_id,
u64 resource_units);
+void devl_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv);
void devlink_resource_occ_get_register(struct devlink *devlink,
u64 resource_id,
devlink_resource_occ_get_t *occ_get,
void *occ_get_priv);
+void devl_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id);
+
void devlink_resource_occ_get_unregister(struct devlink *devlink,
u64 resource_id);
int devlink_params_register(struct devlink *devlink,
@@ -925,52 +1682,66 @@ int devlink_params_register(struct devlink *devlink,
void devlink_params_unregister(struct devlink *devlink,
const struct devlink_param *params,
size_t params_count);
-void devlink_params_publish(struct devlink *devlink);
-void devlink_params_unpublish(struct devlink *devlink);
-int devlink_port_params_register(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count);
-void devlink_port_params_unregister(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count);
+int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param);
+void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param);
int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
union devlink_param_value *init_val);
int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
union devlink_param_value init_val);
-int
-devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value *init_val);
-int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value init_val);
void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
-void devlink_port_param_value_changed(struct devlink_port *devlink_port,
- u32 param_id);
-void devlink_param_value_str_fill(union devlink_param_value *dst_val,
- const char *src);
-struct devlink_region *devlink_region_create(struct devlink *devlink,
- const char *region_name,
- u32 region_max_snapshots,
- u64 region_size);
+struct devlink_region *devl_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots,
+ u64 region_size);
+struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size);
+struct devlink_region *
+devlink_port_region_create(struct devlink_port *port,
+ const struct devlink_port_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size);
+void devl_region_destroy(struct devlink_region *region);
void devlink_region_destroy(struct devlink_region *region);
-u32 devlink_region_snapshot_id_get(struct devlink *devlink);
+void devlink_port_region_destroy(struct devlink_region *region);
+
+int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id);
+void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id);
int devlink_region_snapshot_create(struct devlink_region *region,
- u8 *data, u32 snapshot_id,
- devlink_snapshot_data_dest_t *data_destructor);
+ u8 *data, u32 snapshot_id);
int devlink_info_serial_number_put(struct devlink_info_req *req,
const char *sn);
int devlink_info_driver_name_put(struct devlink_info_req *req,
const char *name);
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn);
+
+enum devlink_info_version_type {
+ DEVLINK_INFO_VERSION_TYPE_NONE,
+ DEVLINK_INFO_VERSION_TYPE_COMPONENT, /* May be used as flash update
+ * component by name.
+ */
+};
+
int devlink_info_version_fixed_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value);
int devlink_info_version_stored_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value);
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type);
int devlink_info_version_running_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value);
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type);
int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
@@ -981,12 +1752,14 @@ int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg);
int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
const char *name);
int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg);
+int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name);
+int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg);
-int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value);
-int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value);
int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value);
-int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value);
int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value);
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len);
int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
bool value);
@@ -1004,11 +1777,19 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, bool auto_recover,
- void *priv);
+ u64 graceful_period, void *priv);
+
+struct devlink_health_reporter *
+devlink_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv);
+
void
devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
+void
+devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter);
+
void *
devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
int devlink_health_report(struct devlink_health_reporter *reporter,
@@ -1020,31 +1801,65 @@ void
devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
bool devlink_is_reload_failed(const struct devlink *devlink);
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+ enum devlink_reload_limit limit,
+ u32 actions_performed);
-void devlink_flash_update_begin_notify(struct devlink *devlink);
-void devlink_flash_update_end_notify(struct devlink *devlink);
void devlink_flash_update_status_notify(struct devlink *devlink,
const char *status_msg,
const char *component,
unsigned long done,
unsigned long total);
-
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long timeout);
+
+int devl_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv);
int devlink_traps_register(struct devlink *devlink,
const struct devlink_trap *traps,
size_t traps_count, void *priv);
+void devl_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count);
void devlink_traps_unregister(struct devlink *devlink,
const struct devlink_trap *traps,
size_t traps_count);
-void devlink_trap_report(struct devlink *devlink,
- struct sk_buff *skb, void *trap_ctx,
- struct devlink_port *in_devlink_port);
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+ void *trap_ctx, struct devlink_port *in_devlink_port,
+ const struct flow_action_cookie *fa_cookie);
void *devlink_trap_ctx_priv(void *trap_ctx);
+int devl_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count);
+int devlink_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count);
+void devl_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count);
+void devlink_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count);
+int
+devl_trap_policers_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count);
+void
+devl_trap_policers_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count);
#if IS_ENABLED(CONFIG_NET_DEVLINK)
-void devlink_compat_running_version(struct net_device *dev,
+struct devlink *__must_check devlink_try_get(struct devlink *devlink);
+void devlink_put(struct devlink *devlink);
+
+void devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len);
-int devlink_compat_flash_update(struct net_device *dev, const char *file_name);
+int devlink_compat_flash_update(struct devlink *devlink, const char *file_name);
int devlink_compat_phys_port_name_get(struct net_device *dev,
char *name, size_t len);
int devlink_compat_switch_id_get(struct net_device *dev,
@@ -1052,13 +1867,22 @@ int devlink_compat_switch_id_get(struct net_device *dev,
#else
+static inline struct devlink *devlink_try_get(struct devlink *devlink)
+{
+ return NULL;
+}
+
+static inline void devlink_put(struct devlink *devlink)
+{
+}
+
static inline void
-devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
+devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len)
{
}
static inline int
-devlink_compat_flash_update(struct net_device *dev, const char *file_name)
+devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
{
return -EOPNOTSUPP;
}
diff --git a/include/net/dn.h b/include/net/dn.h
deleted file mode 100644
index 56ab0726c641..000000000000
--- a/include/net/dn.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_H
-#define _NET_DN_H
-
-#include <linux/dn.h>
-#include <net/sock.h>
-#include <net/flow.h>
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-struct dn_scp /* Session Control Port */
-{
- unsigned char state;
-#define DN_O 1 /* Open */
-#define DN_CR 2 /* Connect Receive */
-#define DN_DR 3 /* Disconnect Reject */
-#define DN_DRC 4 /* Discon. Rej. Complete*/
-#define DN_CC 5 /* Connect Confirm */
-#define DN_CI 6 /* Connect Initiate */
-#define DN_NR 7 /* No resources */
-#define DN_NC 8 /* No communication */
-#define DN_CD 9 /* Connect Delivery */
-#define DN_RJ 10 /* Rejected */
-#define DN_RUN 11 /* Running */
-#define DN_DI 12 /* Disconnect Initiate */
-#define DN_DIC 13 /* Disconnect Complete */
-#define DN_DN 14 /* Disconnect Notificat */
-#define DN_CL 15 /* Closed */
-#define DN_CN 16 /* Closed Notification */
-
- __le16 addrloc;
- __le16 addrrem;
- __u16 numdat;
- __u16 numoth;
- __u16 numoth_rcv;
- __u16 numdat_rcv;
- __u16 ackxmt_dat;
- __u16 ackxmt_oth;
- __u16 ackrcv_dat;
- __u16 ackrcv_oth;
- __u8 flowrem_sw;
- __u8 flowloc_sw;
-#define DN_SEND 2
-#define DN_DONTSEND 1
-#define DN_NOCHANGE 0
- __u16 flowrem_dat;
- __u16 flowrem_oth;
- __u16 flowloc_dat;
- __u16 flowloc_oth;
- __u8 services_rem;
- __u8 services_loc;
- __u8 info_rem;
- __u8 info_loc;
-
- __u16 segsize_rem;
- __u16 segsize_loc;
-
- __u8 nonagle;
- __u8 multi_ireq;
- __u8 accept_mode;
- unsigned long seg_total; /* Running total of current segment */
-
- struct optdata_dn conndata_in;
- struct optdata_dn conndata_out;
- struct optdata_dn discdata_in;
- struct optdata_dn discdata_out;
- struct accessdata_dn accessdata;
-
- struct sockaddr_dn addr; /* Local address */
- struct sockaddr_dn peer; /* Remote address */
-
- /*
- * In this case the RTT estimation is not specified in the
- * docs, nor is any back off algorithm. Here we follow well
- * known tcp algorithms with a few small variations.
- *
- * snd_window: Max number of packets we send before we wait for
- * an ack to come back. This will become part of a
- * more complicated scheme when we support flow
- * control.
- *
- * nsp_srtt: Round-Trip-Time (x8) in jiffies. This is a rolling
- * average.
- * nsp_rttvar: Round-Trip-Time-Varience (x4) in jiffies. This is the
- * varience of the smoothed average (but calculated in
- * a simpler way than for normal statistical varience
- * calculations).
- *
- * nsp_rxtshift: Backoff counter. Value is zero normally, each time
- * a packet is lost is increases by one until an ack
- * is received. Its used to index an array of backoff
- * multipliers.
- */
-#define NSP_MIN_WINDOW 1
-#define NSP_MAX_WINDOW (0x07fe)
- unsigned long max_window;
- unsigned long snd_window;
-#define NSP_INITIAL_SRTT (HZ)
- unsigned long nsp_srtt;
-#define NSP_INITIAL_RTTVAR (HZ*3)
- unsigned long nsp_rttvar;
-#define NSP_MAXRXTSHIFT 12
- unsigned long nsp_rxtshift;
-
- /*
- * Output queues, one for data, one for otherdata/linkservice
- */
- struct sk_buff_head data_xmit_queue;
- struct sk_buff_head other_xmit_queue;
-
- /*
- * Input queue for other data
- */
- struct sk_buff_head other_receive_queue;
- int other_report;
-
- /*
- * Stuff to do with the slow timer
- */
- unsigned long stamp; /* time of last transmit */
- unsigned long persist;
- int (*persist_fxn)(struct sock *sk);
- unsigned long keepalive;
- void (*keepalive_fxn)(struct sock *sk);
-
-};
-
-static inline struct dn_scp *DN_SK(struct sock *sk)
-{
- return (struct dn_scp *)(sk + 1);
-}
-
-/*
- * src,dst : Source and Destination DECnet addresses
- * hops : Number of hops through the network
- * dst_port, src_port : NSP port numbers
- * services, info : Useful data extracted from conninit messages
- * rt_flags : Routing flags byte
- * nsp_flags : NSP layer flags byte
- * segsize : Size of segment
- * segnum : Number, for data, otherdata and linkservice
- * xmit_count : Number of times we've transmitted this skb
- * stamp : Time stamp of most recent transmission, used in RTT calculations
- * iif: Input interface number
- *
- * As a general policy, this structure keeps all addresses in network
- * byte order, and all else in host byte order. Thus dst, src, dst_port
- * and src_port are in network order. All else is in host order.
- *
- */
-#define DN_SKB_CB(skb) ((struct dn_skb_cb *)(skb)->cb)
-struct dn_skb_cb {
- __le16 dst;
- __le16 src;
- __u16 hops;
- __le16 dst_port;
- __le16 src_port;
- __u8 services;
- __u8 info;
- __u8 rt_flags;
- __u8 nsp_flags;
- __u16 segsize;
- __u16 segnum;
- __u16 xmit_count;
- unsigned long stamp;
- int iif;
-};
-
-static inline __le16 dn_eth2dn(unsigned char *ethaddr)
-{
- return get_unaligned((__le16 *)(ethaddr + 4));
-}
-
-static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr)
-{
- return *(__le16 *)saddr->sdn_nodeaddr;
-}
-
-static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr)
-{
- __u16 a = le16_to_cpu(addr);
- ethaddr[0] = 0xAA;
- ethaddr[1] = 0x00;
- ethaddr[2] = 0x04;
- ethaddr[3] = 0x00;
- ethaddr[4] = (__u8)(a & 0xff);
- ethaddr[5] = (__u8)(a >> 8);
-}
-
-static inline void dn_sk_ports_copy(struct flowidn *fld, struct dn_scp *scp)
-{
- fld->fld_sport = scp->addrloc;
- fld->fld_dport = scp->addrrem;
-}
-
-unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu);
-void dn_register_sysctl(void);
-void dn_unregister_sysctl(void);
-
-#define DN_MENUVER_ACC 0x01
-#define DN_MENUVER_USR 0x02
-#define DN_MENUVER_PRX 0x04
-#define DN_MENUVER_UIC 0x08
-
-struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr);
-struct sock *dn_find_by_skb(struct sk_buff *skb);
-#define DN_ASCBUF_LEN 9
-char *dn_addr2asc(__u16, char *);
-int dn_destroy_timer(struct sock *sk);
-
-int dn_sockaddr2username(struct sockaddr_dn *addr, unsigned char *buf,
- unsigned char type);
-int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *addr,
- unsigned char *type);
-
-void dn_start_slow_timer(struct sock *sk);
-void dn_stop_slow_timer(struct sock *sk);
-
-extern __le16 decnet_address;
-extern int decnet_debug_level;
-extern int decnet_time_wait;
-extern int decnet_dn_count;
-extern int decnet_di_count;
-extern int decnet_dr_count;
-extern int decnet_no_fc_max_cwnd;
-
-extern long sysctl_decnet_mem[3];
-extern int sysctl_decnet_wmem[3];
-extern int sysctl_decnet_rmem[3];
-
-#endif /* _NET_DN_H */
diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
deleted file mode 100644
index 595b4f6c1eb1..000000000000
--- a/include/net/dn_dev.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_DEV_H
-#define _NET_DN_DEV_H
-
-
-struct dn_dev;
-
-struct dn_ifaddr {
- struct dn_ifaddr __rcu *ifa_next;
- struct dn_dev *ifa_dev;
- __le16 ifa_local;
- __le16 ifa_address;
- __u32 ifa_flags;
- __u8 ifa_scope;
- char ifa_label[IFNAMSIZ];
- struct rcu_head rcu;
-};
-
-#define DN_DEV_S_RU 0 /* Run - working normally */
-#define DN_DEV_S_CR 1 /* Circuit Rejected */
-#define DN_DEV_S_DS 2 /* Data Link Start */
-#define DN_DEV_S_RI 3 /* Routing Layer Initialize */
-#define DN_DEV_S_RV 4 /* Routing Layer Verify */
-#define DN_DEV_S_RC 5 /* Routing Layer Complete */
-#define DN_DEV_S_OF 6 /* Off */
-#define DN_DEV_S_HA 7 /* Halt */
-
-
-/*
- * The dn_dev_parms structure contains the set of parameters
- * for each device (hence inclusion in the dn_dev structure)
- * and an array is used to store the default types of supported
- * device (in dn_dev.c).
- *
- * The type field matches the ARPHRD_ constants and is used in
- * searching the list for supported devices when new devices
- * come up.
- *
- * The mode field is used to find out if a device is broadcast,
- * multipoint, or pointopoint. Please note that DECnet thinks
- * different ways about devices to the rest of the kernel
- * so the normal IFF_xxx flags are invalid here. For devices
- * which can be any combination of the previously mentioned
- * attributes, you can set this on a per device basis by
- * installing an up() routine.
- *
- * The device state field, defines the initial state in which the
- * device will come up. In the dn_dev structure, it is the actual
- * state.
- *
- * Things have changed here. I've killed timer1 since it's a user space
- * issue for a user space routing deamon to sort out. The kernel does
- * not need to be bothered with it.
- *
- * Timers:
- * t2 - Rate limit timer, min time between routing and hello messages
- * t3 - Hello timer, send hello messages when it expires
- *
- * Callbacks:
- * up() - Called to initialize device, return value can veto use of
- * device with DECnet.
- * down() - Called to turn device off when it goes down
- * timer3() - Called once for each ifaddr when timer 3 goes off
- *
- * sysctl - Hook for sysctl things
- *
- */
-struct dn_dev_parms {
- int type; /* ARPHRD_xxx */
- int mode; /* Broadcast, Unicast, Mulitpoint */
-#define DN_DEV_BCAST 1
-#define DN_DEV_UCAST 2
-#define DN_DEV_MPOINT 4
- int state; /* Initial state */
- int forwarding; /* 0=EndNode, 1=L1Router, 2=L2Router */
- unsigned long t2; /* Default value of t2 */
- unsigned long t3; /* Default value of t3 */
- int priority; /* Priority to be a router */
- char *name; /* Name for sysctl */
- int (*up)(struct net_device *);
- void (*down)(struct net_device *);
- void (*timer3)(struct net_device *, struct dn_ifaddr *ifa);
- void *sysctl;
-};
-
-
-struct dn_dev {
- struct dn_ifaddr __rcu *ifa_list;
- struct net_device *dev;
- struct dn_dev_parms parms;
- char use_long;
- struct timer_list timer;
- unsigned long t3;
- struct neigh_parms *neigh_parms;
- __u8 addr[ETH_ALEN];
- struct neighbour *router; /* Default router on circuit */
- struct neighbour *peer; /* Peer on pointopoint links */
- unsigned long uptime; /* Time device went up in jiffies */
-};
-
-struct dn_short_packet {
- __u8 msgflg;
- __le16 dstnode;
- __le16 srcnode;
- __u8 forward;
-} __packed;
-
-struct dn_long_packet {
- __u8 msgflg;
- __u8 d_area;
- __u8 d_subarea;
- __u8 d_id[6];
- __u8 s_area;
- __u8 s_subarea;
- __u8 s_id[6];
- __u8 nl2;
- __u8 visit_ct;
- __u8 s_class;
- __u8 pt;
-} __packed;
-
-/*------------------------- DRP - Routing messages ---------------------*/
-
-struct endnode_hello_message {
- __u8 msgflg;
- __u8 tiver[3];
- __u8 id[6];
- __u8 iinfo;
- __le16 blksize;
- __u8 area;
- __u8 seed[8];
- __u8 neighbor[6];
- __le16 timer;
- __u8 mpd;
- __u8 datalen;
- __u8 data[2];
-} __packed;
-
-struct rtnode_hello_message {
- __u8 msgflg;
- __u8 tiver[3];
- __u8 id[6];
- __u8 iinfo;
- __le16 blksize;
- __u8 priority;
- __u8 area;
- __le16 timer;
- __u8 mpd;
-} __packed;
-
-
-void dn_dev_init(void);
-void dn_dev_cleanup(void);
-
-int dn_dev_ioctl(unsigned int cmd, void __user *arg);
-
-void dn_dev_devices_off(void);
-void dn_dev_devices_on(void);
-
-void dn_dev_init_pkt(struct sk_buff *skb);
-void dn_dev_veri_pkt(struct sk_buff *skb);
-void dn_dev_hello(struct sk_buff *skb);
-
-void dn_dev_up(struct net_device *);
-void dn_dev_down(struct net_device *);
-
-int dn_dev_set_default(struct net_device *dev, int force);
-struct net_device *dn_dev_get_default(void);
-int dn_dev_bind_default(__le16 *addr);
-
-int register_dnaddr_notifier(struct notifier_block *nb);
-int unregister_dnaddr_notifier(struct notifier_block *nb);
-
-static inline int dn_dev_islocal(struct net_device *dev, __le16 addr)
-{
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int res = 0;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL) {
- printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n");
- goto out;
- }
-
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa != NULL;
- ifa = rcu_dereference(ifa->ifa_next))
- if ((addr ^ ifa->ifa_local) == 0) {
- res = 1;
- break;
- }
-out:
- rcu_read_unlock();
- return res;
-}
-
-#endif /* _NET_DN_DEV_H */
diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
deleted file mode 100644
index 6dd2213c5eb2..000000000000
--- a/include/net/dn_fib.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_FIB_H
-#define _NET_DN_FIB_H
-
-#include <linux/netlink.h>
-#include <linux/refcount.h>
-
-extern const struct nla_policy rtm_dn_policy[];
-
-struct dn_fib_res {
- struct fib_rule *r;
- struct dn_fib_info *fi;
- unsigned char prefixlen;
- unsigned char nh_sel;
- unsigned char type;
- unsigned char scope;
-};
-
-struct dn_fib_nh {
- struct net_device *nh_dev;
- unsigned int nh_flags;
- unsigned char nh_scope;
- int nh_weight;
- int nh_power;
- int nh_oif;
- __le16 nh_gw;
-};
-
-struct dn_fib_info {
- struct dn_fib_info *fib_next;
- struct dn_fib_info *fib_prev;
- int fib_treeref;
- refcount_t fib_clntref;
- int fib_dead;
- unsigned int fib_flags;
- int fib_protocol;
- __le16 fib_prefsrc;
- __u32 fib_priority;
- __u32 fib_metrics[RTAX_MAX];
- int fib_nhs;
- int fib_power;
- struct dn_fib_nh fib_nh[0];
-#define dn_fib_dev fib_nh[0].nh_dev
-};
-
-
-#define DN_FIB_RES_RESET(res) ((res).nh_sel = 0)
-#define DN_FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel])
-
-#define DN_FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __dn_fib_res_prefsrc(&res))
-#define DN_FIB_RES_GW(res) (DN_FIB_RES_NH(res).nh_gw)
-#define DN_FIB_RES_DEV(res) (DN_FIB_RES_NH(res).nh_dev)
-#define DN_FIB_RES_OIF(res) (DN_FIB_RES_NH(res).nh_oif)
-
-typedef struct {
- __le16 datum;
-} dn_fib_key_t;
-
-typedef struct {
- __le16 datum;
-} dn_fib_hash_t;
-
-typedef struct {
- __u16 datum;
-} dn_fib_idx_t;
-
-struct dn_fib_node {
- struct dn_fib_node *fn_next;
- struct dn_fib_info *fn_info;
-#define DN_FIB_INFO(f) ((f)->fn_info)
- dn_fib_key_t fn_key;
- u8 fn_type;
- u8 fn_scope;
- u8 fn_state;
-};
-
-
-struct dn_fib_table {
- struct hlist_node hlist;
- u32 n;
-
- int (*insert)(struct dn_fib_table *t, struct rtmsg *r,
- struct nlattr *attrs[], struct nlmsghdr *n,
- struct netlink_skb_parms *req);
- int (*delete)(struct dn_fib_table *t, struct rtmsg *r,
- struct nlattr *attrs[], struct nlmsghdr *n,
- struct netlink_skb_parms *req);
- int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld,
- struct dn_fib_res *res);
- int (*flush)(struct dn_fib_table *t);
- int (*dump)(struct dn_fib_table *t, struct sk_buff *skb, struct netlink_callback *cb);
-
- unsigned char data[0];
-};
-
-#ifdef CONFIG_DECNET_ROUTER
-/*
- * dn_fib.c
- */
-void dn_fib_init(void);
-void dn_fib_cleanup(void);
-
-int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r,
- struct nlattr *attrs[],
- const struct nlmsghdr *nlh, int *errp);
-int dn_fib_semantic_match(int type, struct dn_fib_info *fi,
- const struct flowidn *fld, struct dn_fib_res *res);
-void dn_fib_release_info(struct dn_fib_info *fi);
-void dn_fib_flush(void);
-void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res);
-
-/*
- * dn_tables.c
- */
-struct dn_fib_table *dn_fib_get_table(u32 n, int creat);
-struct dn_fib_table *dn_fib_empty_table(void);
-void dn_fib_table_init(void);
-void dn_fib_table_cleanup(void);
-
-/*
- * dn_rules.c
- */
-void dn_fib_rules_init(void);
-void dn_fib_rules_cleanup(void);
-unsigned int dnet_addr_type(__le16 addr);
-int dn_fib_lookup(struct flowidn *fld, struct dn_fib_res *res);
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
-void dn_fib_free_info(struct dn_fib_info *fi);
-
-static inline void dn_fib_info_put(struct dn_fib_info *fi)
-{
- if (refcount_dec_and_test(&fi->fib_clntref))
- dn_fib_free_info(fi);
-}
-
-static inline void dn_fib_res_put(struct dn_fib_res *res)
-{
- if (res->fi)
- dn_fib_info_put(res->fi);
- if (res->r)
- fib_rule_put(res->r);
-}
-
-#else /* Endnode */
-
-#define dn_fib_init() do { } while(0)
-#define dn_fib_cleanup() do { } while(0)
-
-#define dn_fib_lookup(fl, res) (-ESRCH)
-#define dn_fib_info_put(fi) do { } while(0)
-#define dn_fib_select_multipath(fl, res) do { } while(0)
-#define dn_fib_rules_policy(saddr,res,flags) (0)
-#define dn_fib_res_put(res) do { } while(0)
-
-#endif /* CONFIG_DECNET_ROUTER */
-
-static inline __le16 dnet_make_mask(int n)
-{
- if (n)
- return cpu_to_le16(~((1 << (16 - n)) - 1));
- return cpu_to_le16(0);
-}
-
-#endif /* _NET_DN_FIB_H */
diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h
deleted file mode 100644
index 2e3e7793973a..000000000000
--- a/include/net/dn_neigh.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_NEIGH_H
-#define _NET_DN_NEIGH_H
-
-/*
- * The position of the first two fields of
- * this structure are critical - SJW
- */
-struct dn_neigh {
- struct neighbour n;
- __le16 addr;
- unsigned long flags;
-#define DN_NDFLAG_R1 0x0001 /* Router L1 */
-#define DN_NDFLAG_R2 0x0002 /* Router L2 */
-#define DN_NDFLAG_P3 0x0004 /* Phase III Node */
- unsigned long blksize;
- __u8 priority;
-};
-
-void dn_neigh_init(void);
-void dn_neigh_cleanup(void);
-int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
-int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
-void dn_neigh_pointopoint_hello(struct sk_buff *skb);
-int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n);
-int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb);
-
-extern struct neigh_table dn_neigh_table;
-
-#endif /* _NET_DN_NEIGH_H */
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
deleted file mode 100644
index f83932b864a9..000000000000
--- a/include/net/dn_nsp.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _NET_DN_NSP_H
-#define _NET_DN_NSP_H
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-/* dn_nsp.c functions prototyping */
-
-void dn_nsp_send_data_ack(struct sock *sk);
-void dn_nsp_send_oth_ack(struct sock *sk);
-void dn_send_conn_ack(struct sock *sk);
-void dn_send_conn_conf(struct sock *sk, gfp_t gfp);
-void dn_nsp_send_disc(struct sock *sk, unsigned char type,
- unsigned short reason, gfp_t gfp);
-void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type,
- unsigned short reason);
-void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval);
-void dn_nsp_send_conninit(struct sock *sk, unsigned char flags);
-
-void dn_nsp_output(struct sock *sk);
-int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *q, unsigned short acknum);
-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, gfp_t gfp,
- int oob);
-unsigned long dn_nsp_persist(struct sock *sk);
-int dn_nsp_xmit_timeout(struct sock *sk);
-
-int dn_nsp_rx(struct sk_buff *);
-int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
-
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
-struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock,
- long timeo, int *err);
-
-#define NSP_REASON_OK 0 /* No error */
-#define NSP_REASON_NR 1 /* No resources */
-#define NSP_REASON_UN 2 /* Unrecognised node name */
-#define NSP_REASON_SD 3 /* Node shutting down */
-#define NSP_REASON_ID 4 /* Invalid destination end user */
-#define NSP_REASON_ER 5 /* End user lacks resources */
-#define NSP_REASON_OB 6 /* Object too busy */
-#define NSP_REASON_US 7 /* Unspecified error */
-#define NSP_REASON_TP 8 /* Third-Party abort */
-#define NSP_REASON_EA 9 /* End user has aborted the link */
-#define NSP_REASON_IF 10 /* Invalid node name format */
-#define NSP_REASON_LS 11 /* Local node shutdown */
-#define NSP_REASON_LL 32 /* Node lacks logical-link resources */
-#define NSP_REASON_LE 33 /* End user lacks logical-link resources */
-#define NSP_REASON_UR 34 /* Unacceptable RQSTRID or PASSWORD field */
-#define NSP_REASON_UA 36 /* Unacceptable ACCOUNT field */
-#define NSP_REASON_TM 38 /* End user timed out logical link */
-#define NSP_REASON_NU 39 /* Node unreachable */
-#define NSP_REASON_NL 41 /* No-link message */
-#define NSP_REASON_DC 42 /* Disconnect confirm */
-#define NSP_REASON_IO 43 /* Image data field overflow */
-
-#define NSP_DISCINIT 0x38
-#define NSP_DISCCONF 0x48
-
-/*------------------------- NSP - messages ------------------------------*/
-/* Data Messages */
-/*---------------*/
-
-/* Data Messages (data segment/interrupt/link service) */
-
-struct nsp_data_seg_msg {
- __u8 msgflg;
- __le16 dstaddr;
- __le16 srcaddr;
-} __packed;
-
-struct nsp_data_opt_msg {
- __le16 acknum;
- __le16 segnum;
- __le16 lsflgs;
-} __packed;
-
-struct nsp_data_opt_msg1 {
- __le16 acknum;
- __le16 segnum;
-} __packed;
-
-
-/* Acknowledgment Message (data/other data) */
-struct nsp_data_ack_msg {
- __u8 msgflg;
- __le16 dstaddr;
- __le16 srcaddr;
- __le16 acknum;
-} __packed;
-
-/* Connect Acknowledgment Message */
-struct nsp_conn_ack_msg {
- __u8 msgflg;
- __le16 dstaddr;
-} __packed;
-
-
-/* Connect Initiate/Retransmit Initiate/Connect Confirm */
-struct nsp_conn_init_msg {
- __u8 msgflg;
-#define NSP_CI 0x18 /* Connect Initiate */
-#define NSP_RCI 0x68 /* Retrans. Conn Init */
- __le16 dstaddr;
- __le16 srcaddr;
- __u8 services;
-#define NSP_FC_NONE 0x00 /* Flow Control None */
-#define NSP_FC_SRC 0x04 /* Seg Req. Count */
-#define NSP_FC_SCMC 0x08 /* Sess. Control Mess */
-#define NSP_FC_MASK 0x0c /* FC type mask */
- __u8 info;
- __le16 segsize;
-} __packed;
-
-/* Disconnect Initiate/Disconnect Confirm */
-struct nsp_disconn_init_msg {
- __u8 msgflg;
- __le16 dstaddr;
- __le16 srcaddr;
- __le16 reason;
-} __packed;
-
-
-
-struct srcobj_fmt {
- __u8 format;
- __u8 task;
- __le16 grpcode;
- __le16 usrcode;
- __u8 dlen;
-} __packed;
-
-/*
- * A collection of functions for manipulating the sequence
- * numbers used in NSP. Similar in operation to the functions
- * of the same name in TCP.
- */
-static __inline__ int dn_before(__u16 seq1, __u16 seq2)
-{
- seq1 &= 0x0fff;
- seq2 &= 0x0fff;
-
- return (int)((seq1 - seq2) & 0x0fff) > 2048;
-}
-
-
-static __inline__ int dn_after(__u16 seq1, __u16 seq2)
-{
- seq1 &= 0x0fff;
- seq2 &= 0x0fff;
-
- return (int)((seq2 - seq1) & 0x0fff) > 2048;
-}
-
-static __inline__ int dn_equal(__u16 seq1, __u16 seq2)
-{
- return ((seq1 ^ seq2) & 0x0fff) == 0;
-}
-
-static __inline__ int dn_before_or_equal(__u16 seq1, __u16 seq2)
-{
- return (dn_before(seq1, seq2) || dn_equal(seq1, seq2));
-}
-
-static __inline__ void seq_add(__u16 *seq, __u16 off)
-{
- (*seq) += off;
- (*seq) &= 0x0fff;
-}
-
-static __inline__ int seq_next(__u16 seq1, __u16 seq2)
-{
- return dn_equal(seq1 + 1, seq2);
-}
-
-/*
- * Can we delay the ack ?
- */
-static __inline__ int sendack(__u16 seq)
-{
- return (int)((seq & 0x1000) ? 0 : 1);
-}
-
-/*
- * Is socket congested ?
- */
-static __inline__ int dn_congested(struct sock *sk)
-{
- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
-}
-
-#define DN_MAX_NSP_DATA_HEADER (11)
-
-#endif /* _NET_DN_NSP_H */
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
deleted file mode 100644
index 6f1e94ac0bdf..000000000000
--- a/include/net/dn_route.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _NET_DN_ROUTE_H
-#define _NET_DN_ROUTE_H
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
-int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *,
- struct sock *sk, int flags);
-int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
-void dn_rt_cache_flush(int delay);
-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev);
-
-/* Masks for flags field */
-#define DN_RT_F_PID 0x07 /* Mask for packet type */
-#define DN_RT_F_PF 0x80 /* Padding Follows */
-#define DN_RT_F_VER 0x40 /* Version =0 discard packet if ==1 */
-#define DN_RT_F_IE 0x20 /* Intra Ethernet, Reserved in short pkt */
-#define DN_RT_F_RTS 0x10 /* Packet is being returned to sender */
-#define DN_RT_F_RQR 0x08 /* Return packet to sender upon non-delivery */
-
-/* Mask for types of routing packets */
-#define DN_RT_PKT_MSK 0x06
-/* Types of routing packets */
-#define DN_RT_PKT_SHORT 0x02 /* Short routing packet */
-#define DN_RT_PKT_LONG 0x06 /* Long routing packet */
-
-/* Mask for control/routing selection */
-#define DN_RT_PKT_CNTL 0x01 /* Set to 1 if a control packet */
-/* Types of control packets */
-#define DN_RT_CNTL_MSK 0x0f /* Mask for control packets */
-#define DN_RT_PKT_INIT 0x01 /* Initialisation packet */
-#define DN_RT_PKT_VERI 0x03 /* Verification Message */
-#define DN_RT_PKT_HELO 0x05 /* Hello and Test Message */
-#define DN_RT_PKT_L1RT 0x07 /* Level 1 Routing Message */
-#define DN_RT_PKT_L2RT 0x09 /* Level 2 Routing Message */
-#define DN_RT_PKT_ERTH 0x0b /* Ethernet Router Hello */
-#define DN_RT_PKT_EEDH 0x0d /* Ethernet EndNode Hello */
-
-/* Values for info field in hello message */
-#define DN_RT_INFO_TYPE 0x03 /* Type mask */
-#define DN_RT_INFO_L1RT 0x02 /* L1 Router */
-#define DN_RT_INFO_L2RT 0x01 /* L2 Router */
-#define DN_RT_INFO_ENDN 0x03 /* EndNode */
-#define DN_RT_INFO_VERI 0x04 /* Verification Reqd. */
-#define DN_RT_INFO_RJCT 0x08 /* Reject Flag, Reserved */
-#define DN_RT_INFO_VFLD 0x10 /* Verification Failed, Reserved */
-#define DN_RT_INFO_NOML 0x20 /* No Multicast traffic accepted */
-#define DN_RT_INFO_BLKR 0x40 /* Blocking Requested */
-
-/*
- * The fl structure is what we used to look up the route.
- * The rt_saddr & rt_daddr entries are the same as key.saddr & key.daddr
- * except for local input routes, where the rt_saddr = fl.fld_dst and
- * rt_daddr = fl.fld_src to allow the route to be used for returning
- * packets to the originating host.
- */
-struct dn_route {
- struct dst_entry dst;
- struct dn_route __rcu *dn_next;
-
- struct neighbour *n;
-
- struct flowidn fld;
-
- __le16 rt_saddr;
- __le16 rt_daddr;
- __le16 rt_gateway;
- __le16 rt_local_src; /* Source used for forwarding packets */
- __le16 rt_src_map;
- __le16 rt_dst_map;
-
- unsigned int rt_flags;
- unsigned int rt_type;
-};
-
-static inline bool dn_is_input_route(struct dn_route *rt)
-{
- return rt->fld.flowidn_iif != 0;
-}
-
-static inline bool dn_is_output_route(struct dn_route *rt)
-{
- return rt->fld.flowidn_iif == 0;
-}
-
-void dn_route_init(void);
-void dn_route_cleanup(void);
-
-#include <net/sock.h>
-#include <linux/if_arp.h>
-
-static inline void dn_rt_send(struct sk_buff *skb)
-{
- dev_queue_xmit(skb);
-}
-
-static inline void dn_rt_finish_output(struct sk_buff *skb, char *dst, char *src)
-{
- struct net_device *dev = skb->dev;
-
- if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
- dst = NULL;
-
- if (dev_hard_header(skb, dev, ETH_P_DNA_RT, dst, src, skb->len) >= 0)
- dn_rt_send(skb);
- else
- kfree_skb(skb);
-}
-
-#endif /* _NET_DN_ROUTE_H */
diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h
deleted file mode 100644
index 2ab668461463..000000000000
--- a/include/net/drop_monitor.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#ifndef _NET_DROP_MONITOR_H_
-#define _NET_DROP_MONITOR_H_
-
-#include <linux/ktime.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-
-/**
- * struct net_dm_hw_metadata - Hardware-supplied packet metadata.
- * @trap_group_name: Hardware trap group name.
- * @trap_name: Hardware trap name.
- * @input_dev: Input netdevice.
- */
-struct net_dm_hw_metadata {
- const char *trap_group_name;
- const char *trap_name;
- struct net_device *input_dev;
-};
-
-#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
-void net_dm_hw_report(struct sk_buff *skb,
- const struct net_dm_hw_metadata *hw_metadata);
-#else
-static inline void
-net_dm_hw_report(struct sk_buff *skb,
- const struct net_dm_hw_metadata *hw_metadata)
-{
-}
-#endif
-
-#endif /* _NET_DROP_MONITOR_H_ */
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
new file mode 100644
index 000000000000..c1cbcdbaf149
--- /dev/null
+++ b/include/net/dropreason.h
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LINUX_DROPREASON_H
+#define _LINUX_DROPREASON_H
+
+#define DEFINE_DROP_REASON(FN, FNe) \
+ FN(NOT_SPECIFIED) \
+ FN(NO_SOCKET) \
+ FN(PKT_TOO_SMALL) \
+ FN(TCP_CSUM) \
+ FN(SOCKET_FILTER) \
+ FN(UDP_CSUM) \
+ FN(NETFILTER_DROP) \
+ FN(OTHERHOST) \
+ FN(IP_CSUM) \
+ FN(IP_INHDR) \
+ FN(IP_RPFILTER) \
+ FN(UNICAST_IN_L2_MULTICAST) \
+ FN(XFRM_POLICY) \
+ FN(IP_NOPROTO) \
+ FN(SOCKET_RCVBUFF) \
+ FN(PROTO_MEM) \
+ FN(TCP_MD5NOTFOUND) \
+ FN(TCP_MD5UNEXPECTED) \
+ FN(TCP_MD5FAILURE) \
+ FN(SOCKET_BACKLOG) \
+ FN(TCP_FLAGS) \
+ FN(TCP_ZEROWINDOW) \
+ FN(TCP_OLD_DATA) \
+ FN(TCP_OVERWINDOW) \
+ FN(TCP_OFOMERGE) \
+ FN(TCP_RFC7323_PAWS) \
+ FN(TCP_INVALID_SEQUENCE) \
+ FN(TCP_RESET) \
+ FN(TCP_INVALID_SYN) \
+ FN(TCP_CLOSE) \
+ FN(TCP_FASTOPEN) \
+ FN(TCP_OLD_ACK) \
+ FN(TCP_TOO_OLD_ACK) \
+ FN(TCP_ACK_UNSENT_DATA) \
+ FN(TCP_OFO_QUEUE_PRUNE) \
+ FN(TCP_OFO_DROP) \
+ FN(IP_OUTNOROUTES) \
+ FN(BPF_CGROUP_EGRESS) \
+ FN(IPV6DISABLED) \
+ FN(NEIGH_CREATEFAIL) \
+ FN(NEIGH_FAILED) \
+ FN(NEIGH_QUEUEFULL) \
+ FN(NEIGH_DEAD) \
+ FN(TC_EGRESS) \
+ FN(QDISC_DROP) \
+ FN(CPU_BACKLOG) \
+ FN(XDP) \
+ FN(TC_INGRESS) \
+ FN(UNHANDLED_PROTO) \
+ FN(SKB_CSUM) \
+ FN(SKB_GSO_SEG) \
+ FN(SKB_UCOPY_FAULT) \
+ FN(DEV_HDR) \
+ FN(DEV_READY) \
+ FN(FULL_RING) \
+ FN(NOMEM) \
+ FN(HDR_TRUNC) \
+ FN(TAP_FILTER) \
+ FN(TAP_TXFILTER) \
+ FN(ICMP_CSUM) \
+ FN(INVALID_PROTO) \
+ FN(IP_INADDRERRORS) \
+ FN(IP_INNOROUTES) \
+ FN(PKT_TOO_BIG) \
+ FNe(MAX)
+
+/**
+ * enum skb_drop_reason - the reasons of skb drops
+ *
+ * The reason of skb drop, which is used in kfree_skb_reason().
+ */
+enum skb_drop_reason {
+ /**
+ * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case)
+ */
+ SKB_NOT_DROPPED_YET = 0,
+ /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
+ SKB_DROP_REASON_NOT_SPECIFIED,
+ /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
+ SKB_DROP_REASON_NO_SOCKET,
+ /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
+ SKB_DROP_REASON_PKT_TOO_SMALL,
+ /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */
+ SKB_DROP_REASON_TCP_CSUM,
+ /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
+ SKB_DROP_REASON_SOCKET_FILTER,
+ /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */
+ SKB_DROP_REASON_UDP_CSUM,
+ /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */
+ SKB_DROP_REASON_NETFILTER_DROP,
+ /**
+ * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host
+ * (interface is in promisc mode)
+ */
+ SKB_DROP_REASON_OTHERHOST,
+ /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */
+ SKB_DROP_REASON_IP_CSUM,
+ /**
+ * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see
+ * IPSTATS_MIB_INHDRERRORS)
+ */
+ SKB_DROP_REASON_IP_INHDR,
+ /**
+ * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the
+ * document for rp_filter in ip-sysctl.rst for more information
+ */
+ SKB_DROP_REASON_IP_RPFILTER,
+ /**
+ * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is
+ * multicast, but L3 is unicast.
+ */
+ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,
+ /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */
+ SKB_DROP_REASON_XFRM_POLICY,
+ /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */
+ SKB_DROP_REASON_IP_NOPROTO,
+ /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
+ SKB_DROP_REASON_SOCKET_RCVBUFF,
+ /**
+ * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet
+ * drop out of udp_memory_allocated.
+ */
+ SKB_DROP_REASON_PROTO_MEM,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected,
+ * corresponding to LINUX_MIB_TCPMD5NOTFOUND
+ */
+ SKB_DROP_REASON_TCP_MD5NOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting
+ * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED
+ */
+ SKB_DROP_REASON_TCP_MD5UNEXPECTED,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding
+ * to LINUX_MIB_TCPMD5FAILURE
+ */
+ SKB_DROP_REASON_TCP_MD5FAILURE,
+ /**
+ * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog (
+ * see LINUX_MIB_TCPBACKLOGDROP)
+ */
+ SKB_DROP_REASON_SOCKET_BACKLOG,
+ /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
+ SKB_DROP_REASON_TCP_FLAGS,
+ /**
+ * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
+ * see LINUX_MIB_TCPZEROWINDOWDROP
+ */
+ SKB_DROP_REASON_TCP_ZEROWINDOW,
+ /**
+ * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already
+ * received before (spurious retrans may happened), see
+ * LINUX_MIB_DELAYEDACKLOST
+ */
+ SKB_DROP_REASON_TCP_OLD_DATA,
+ /**
+ * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window,
+ * the seq of the first byte exceed the right edges of receive
+ * window
+ */
+ SKB_DROP_REASON_TCP_OVERWINDOW,
+ /**
+ * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo
+ * queue, corresponding to LINUX_MIB_TCPOFOMERGE
+ */
+ SKB_DROP_REASON_TCP_OFOMERGE,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
+ * LINUX_MIB_PAWSESTABREJECTED
+ */
+ SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
+ SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
+ /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
+ SKB_DROP_REASON_TCP_RESET,
+ /**
+ * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected
+ * SYN flag
+ */
+ SKB_DROP_REASON_TCP_INVALID_SYN,
+ /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */
+ SKB_DROP_REASON_TCP_CLOSE,
+ /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */
+ SKB_DROP_REASON_TCP_FASTOPEN,
+ /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */
+ SKB_DROP_REASON_TCP_OLD_ACK,
+ /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */
+ SKB_DROP_REASON_TCP_TOO_OLD_ACK,
+ /**
+ * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't
+ * sent yet
+ */
+ SKB_DROP_REASON_TCP_ACK_UNSENT_DATA,
+ /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */
+ SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE,
+ /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */
+ SKB_DROP_REASON_TCP_OFO_DROP,
+ /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */
+ SKB_DROP_REASON_IP_OUTNOROUTES,
+ /**
+ * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB
+ * eBPF program
+ */
+ SKB_DROP_REASON_BPF_CGROUP_EGRESS,
+ /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */
+ SKB_DROP_REASON_IPV6DISABLED,
+ /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */
+ SKB_DROP_REASON_NEIGH_CREATEFAIL,
+ /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */
+ SKB_DROP_REASON_NEIGH_FAILED,
+ /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */
+ SKB_DROP_REASON_NEIGH_QUEUEFULL,
+ /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */
+ SKB_DROP_REASON_NEIGH_DEAD,
+ /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
+ SKB_DROP_REASON_TC_EGRESS,
+ /**
+ * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
+ * failed to enqueue to current qdisc)
+ */
+ SKB_DROP_REASON_QDISC_DROP,
+ /**
+ * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
+ * backlog queue. This can be caused by backlog queue full (see
+ * netdev_max_backlog in net.rst) or RPS flow limit
+ */
+ SKB_DROP_REASON_CPU_BACKLOG,
+ /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */
+ SKB_DROP_REASON_XDP,
+ /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */
+ SKB_DROP_REASON_TC_INGRESS,
+ /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */
+ SKB_DROP_REASON_UNHANDLED_PROTO,
+ /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */
+ SKB_DROP_REASON_SKB_CSUM,
+ /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */
+ SKB_DROP_REASON_SKB_GSO_SEG,
+ /**
+ * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space,
+ * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx()
+ */
+ SKB_DROP_REASON_SKB_UCOPY_FAULT,
+ /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */
+ SKB_DROP_REASON_DEV_HDR,
+ /**
+ * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to
+ * any of its data structure that is not up/ready/initialized,
+ * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq]
+ * is not initialized
+ */
+ SKB_DROP_REASON_DEV_READY,
+ /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */
+ SKB_DROP_REASON_FULL_RING,
+ /** @SKB_DROP_REASON_NOMEM: error due to OOM */
+ SKB_DROP_REASON_NOMEM,
+ /**
+ * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from
+ * networking data, e.g., failed to pull the protocol header from
+ * frags via pskb_may_pull()
+ */
+ SKB_DROP_REASON_HDR_TRUNC,
+ /**
+ * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached
+ * to tun/tap, e.g., via TUNSETFILTEREBPF
+ */
+ SKB_DROP_REASON_TAP_FILTER,
+ /**
+ * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at
+ * tun/tap, e.g., check_filter()
+ */
+ SKB_DROP_REASON_TAP_TXFILTER,
+ /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */
+ SKB_DROP_REASON_ICMP_CSUM,
+ /**
+ * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211,
+ * such as a broadcasts ICMP_TIMESTAMP
+ */
+ SKB_DROP_REASON_INVALID_PROTO,
+ /**
+ * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to
+ * IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_IP_INADDRERRORS,
+ /**
+ * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to
+ * IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_IP_INNOROUTES,
+ /**
+ * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
+ * MTU)
+ */
+ SKB_DROP_REASON_PKT_TOO_BIG,
+ /**
+ * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
+ * used as a real 'reason'
+ */
+ SKB_DROP_REASON_MAX,
+};
+
+#define SKB_DR_INIT(name, reason) \
+ enum skb_drop_reason name = SKB_DROP_REASON_##reason
+#define SKB_DR(name) \
+ SKB_DR_INIT(name, NOT_SPECIFIED)
+#define SKB_DR_SET(name, reason) \
+ (name = SKB_DROP_REASON_##reason)
+#define SKB_DR_OR(name, reason) \
+ do { \
+ if (name == SKB_DROP_REASON_NOT_SPECIFIED || \
+ name == SKB_NOT_DROPPED_YET) \
+ SKB_DR_SET(name, reason); \
+ } while (0)
+
+extern const char * const drop_reasons[];
+
+#endif
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 63495e3443ac..ee369670e20e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -44,10 +44,22 @@ struct phylink_link_state;
#define DSA_TAG_PROTO_KSZ8795_VALUE 14
#define DSA_TAG_PROTO_OCELOT_VALUE 15
#define DSA_TAG_PROTO_AR9331_VALUE 16
+#define DSA_TAG_PROTO_RTL4_A_VALUE 17
+#define DSA_TAG_PROTO_HELLCREEK_VALUE 18
+#define DSA_TAG_PROTO_XRS700X_VALUE 19
+#define DSA_TAG_PROTO_OCELOT_8021Q_VALUE 20
+#define DSA_TAG_PROTO_SEVILLE_VALUE 21
+#define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22
+#define DSA_TAG_PROTO_SJA1110_VALUE 23
+#define DSA_TAG_PROTO_RTL8_4_VALUE 24
+#define DSA_TAG_PROTO_RTL8_4T_VALUE 25
+#define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26
+#define DSA_TAG_PROTO_LAN937X_VALUE 27
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE,
+ DSA_TAG_PROTO_BRCM_LEGACY = DSA_TAG_PROTO_BRCM_LEGACY_VALUE,
DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE,
DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE,
DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE,
@@ -63,48 +75,67 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE,
DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE,
DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE,
+ DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE,
+ DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE,
+ DSA_TAG_PROTO_XRS700X = DSA_TAG_PROTO_XRS700X_VALUE,
+ DSA_TAG_PROTO_OCELOT_8021Q = DSA_TAG_PROTO_OCELOT_8021Q_VALUE,
+ DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE,
+ DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE,
+ DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE,
+ DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE,
+ DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE,
+ DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE,
};
-struct packet_type;
struct dsa_switch;
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
- struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt);
- int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
- int *offset);
- /* Used to determine which traffic should match the DSA filter in
- * eth_type_trans, and which, if any, should bypass it and be processed
- * as regular on the master net device.
- */
- bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
- unsigned int overhead;
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
+ void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
+ int *offset);
+ int (*connect)(struct dsa_switch *ds);
+ void (*disconnect)(struct dsa_switch *ds);
+ unsigned int needed_headroom;
+ unsigned int needed_tailroom;
const char *name;
enum dsa_tag_protocol proto;
+ /* Some tagging protocols either mangle or shift the destination MAC
+ * address, in which case the DSA master would drop packets on ingress
+ * if what it understands out of the destination MAC address is not in
+ * its RX filter.
+ */
+ bool promisc_on_master;
+};
+
+/* This structure defines the control interfaces that are overlayed by the
+ * DSA layer on top of the DSA CPU/management net_device instance. This is
+ * used by the core net_device layer while calling various net_device_ops
+ * function pointers.
+ */
+struct dsa_netdevice_ops {
+ int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr,
+ int cmd);
};
#define DSA_TAG_DRIVER_ALIAS "dsa_tag-"
#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \
MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE))
-struct dsa_skb_cb {
- struct sk_buff *clone;
-};
-
-struct __dsa_skb_cb {
- struct dsa_skb_cb cb;
- u8 priv[48 - sizeof(struct dsa_skb_cb)];
+struct dsa_lag {
+ struct net_device *dev;
+ unsigned int id;
+ struct mutex fdb_lock;
+ struct list_head fdbs;
+ refcount_t refcount;
};
-#define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb))
-
-#define DSA_SKB_CB_PRIV(skb) \
- ((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv))
-
struct dsa_switch_tree {
struct list_head list;
+ /* List of switch ports */
+ struct list_head ports;
+
/* Notifier chain for switch-wide events */
struct raw_notifier_head nh;
@@ -114,6 +145,19 @@ struct dsa_switch_tree {
/* Number of switches attached to this tree */
struct kref refcount;
+ /* Maps offloaded LAG netdevs to a zero-based linear ID for
+ * drivers that need it.
+ */
+ struct dsa_lag **lags;
+
+ /* Tagging protocol operations */
+ const struct dsa_device_ops *tag_ops;
+
+ /* Default tagging protocol preferred by the switches in this
+ * tree.
+ */
+ enum dsa_tag_protocol default_proto;
+
/* Has this tree been applied to the hardware? */
bool setup;
@@ -123,16 +167,55 @@ struct dsa_switch_tree {
*/
struct dsa_platform_data *pd;
- /* List of switch ports */
- struct list_head ports;
-
/* List of DSA links composing the routing table */
struct list_head rtable;
+
+ /* Length of "lags" array */
+ unsigned int lags_len;
+
+ /* Track the largest switch index within a tree */
+ unsigned int last_switch;
};
-/* TC matchall action types, only mirroring for now */
+/* LAG IDs are one-based, the dst->lags array is zero-based */
+#define dsa_lags_foreach_id(_id, _dst) \
+ for ((_id) = 1; (_id) <= (_dst)->lags_len; (_id)++) \
+ if ((_dst)->lags[(_id) - 1])
+
+#define dsa_lag_foreach_port(_dp, _dst, _lag) \
+ list_for_each_entry((_dp), &(_dst)->ports, list) \
+ if (dsa_port_offloads_lag((_dp), (_lag)))
+
+#define dsa_hsr_foreach_port(_dp, _ds, _hsr) \
+ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \
+ if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr))
+
+static inline struct dsa_lag *dsa_lag_by_id(struct dsa_switch_tree *dst,
+ unsigned int id)
+{
+ /* DSA LAG IDs are one-based, dst->lags is zero-based */
+ return dst->lags[id - 1];
+}
+
+static inline int dsa_lag_id(struct dsa_switch_tree *dst,
+ struct net_device *lag_dev)
+{
+ unsigned int id;
+
+ dsa_lags_foreach_id(id, dst) {
+ struct dsa_lag *lag = dsa_lag_by_id(dst, id);
+
+ if (lag->dev == lag_dev)
+ return lag->id;
+ }
+
+ return -ENODEV;
+}
+
+/* TC matchall action types */
enum dsa_port_mall_action_type {
DSA_PORT_MALL_MIRROR,
+ DSA_PORT_MALL_POLICER,
};
/* TC mirroring entry */
@@ -141,6 +224,12 @@ struct dsa_mall_mirror_tc_entry {
bool ingress;
};
+/* TC port policer entry */
+struct dsa_mall_policer_tc_entry {
+ u32 burst;
+ u64 rate_bytes_per_sec;
+};
+
/* TC matchall entry */
struct dsa_mall_tc_entry {
struct list_head list;
@@ -148,9 +237,16 @@ struct dsa_mall_tc_entry {
enum dsa_port_mall_action_type type;
union {
struct dsa_mall_mirror_tc_entry mirror;
+ struct dsa_mall_policer_tc_entry policer;
};
};
+struct dsa_bridge {
+ struct net_device *dev;
+ unsigned int num;
+ bool tx_fwd_offload;
+ refcount_t refcount;
+};
struct dsa_port {
/* A CPU port is physically connected to a master device.
@@ -161,14 +257,18 @@ struct dsa_port {
struct net_device *slave;
};
- /* CPU port tagging operations used by master or slave devices */
+ /* Copy of the tagging protocol operations, for quicker access
+ * in the data path. Valid only for the CPU ports.
+ */
const struct dsa_device_ops *tag_ops;
/* Copies for faster access in master receive hot path */
struct dsa_switch_tree *dst;
- struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt);
- bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
+
+ struct dsa_switch *ds;
+
+ unsigned int index;
enum {
DSA_PORT_TYPE_UNUSED = 0,
@@ -177,29 +277,45 @@ struct dsa_port {
DSA_PORT_TYPE_USER,
} type;
- struct dsa_switch *ds;
- unsigned int index;
const char *name;
struct dsa_port *cpu_dp;
- const char *mac;
+ u8 mac[ETH_ALEN];
+
+ u8 stp_state;
+
+ /* Warning: the following bit fields are not atomic, and updating them
+ * can only be done from code paths where concurrency is not possible
+ * (probe time or under rtnl_lock).
+ */
+ u8 vlan_filtering:1;
+
+ /* Managed by DSA on user ports and by drivers on CPU and DSA ports */
+ u8 learning:1;
+
+ u8 lag_tx_enabled:1;
+
+ /* Master state bits, valid only on CPU ports */
+ u8 master_admin_up:1;
+ u8 master_oper_up:1;
+
+ /* Valid only on user ports */
+ u8 cpu_port_in_lag:1;
+
+ u8 setup:1;
+
struct device_node *dn;
unsigned int ageing_time;
- bool vlan_filtering;
- u8 stp_state;
- struct net_device *bridge_dev;
+
+ struct dsa_bridge *bridge;
struct devlink_port devlink_port;
struct phylink *pl;
struct phylink_config pl_config;
+ struct dsa_lag *lag;
+ struct net_device *hsr_dev;
struct list_head list;
/*
- * Give the switch driver somewhere to hang its per-port private data
- * structures (accessible from the tagger).
- */
- void *priv;
-
- /*
* Original copy of the master netdev ethtool_ops
*/
const struct ethtool_ops *orig_ethtool_ops;
@@ -207,9 +323,18 @@ struct dsa_port {
/*
* Original copy of the master netdev net_device_ops
*/
- const struct net_device_ops *orig_ndo_ops;
+ const struct dsa_netdevice_ops *netdev_ops;
- bool setup;
+ /* List of MAC addresses that must be forwarded on this port.
+ * These are only valid on CPU ports and DSA links.
+ */
+ struct mutex addr_lists_lock;
+ struct list_head fdbs;
+ struct list_head mdbs;
+
+ /* List of VLANs that CPU and DSA ports are members of. */
+ struct mutex vlans_lock;
+ struct list_head vlans;
};
/* TODO: ideally DSA ports would have a single dp->link_dp member,
@@ -223,9 +348,37 @@ struct dsa_link {
struct list_head list;
};
-struct dsa_switch {
- bool setup;
+enum dsa_db_type {
+ DSA_DB_PORT,
+ DSA_DB_LAG,
+ DSA_DB_BRIDGE,
+};
+
+struct dsa_db {
+ enum dsa_db_type type;
+
+ union {
+ const struct dsa_port *dp;
+ struct dsa_lag lag;
+ struct dsa_bridge bridge;
+ };
+};
+
+struct dsa_mac_addr {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+ refcount_t refcount;
+ struct list_head list;
+ struct dsa_db db;
+};
+
+struct dsa_vlan {
+ u16 vid;
+ refcount_t refcount;
+ struct list_head list;
+};
+struct dsa_switch {
struct device *dev;
/*
@@ -234,6 +387,59 @@ struct dsa_switch {
struct dsa_switch_tree *dst;
unsigned int index;
+ /* Warning: the following bit fields are not atomic, and updating them
+ * can only be done from code paths where concurrency is not possible
+ * (probe time or under rtnl_lock).
+ */
+ u32 setup:1;
+
+ /* Disallow bridge core from requesting different VLAN awareness
+ * settings on ports if not hardware-supported
+ */
+ u32 vlan_filtering_is_global:1;
+
+ /* Keep VLAN filtering enabled on ports not offloading any upper */
+ u32 needs_standalone_vlan_filtering:1;
+
+ /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges
+ * that have vlan_filtering=0. All drivers should ideally set this (and
+ * then the option would get removed), but it is unknown whether this
+ * would break things or not.
+ */
+ u32 configure_vlan_while_not_filtering:1;
+
+ /* If the switch driver always programs the CPU port as egress tagged
+ * despite the VLAN configuration indicating otherwise, then setting
+ * @untag_bridge_pvid will force the DSA receive path to pop the
+ * bridge's default_pvid VLAN tagged frames to offer a consistent
+ * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge
+ * device.
+ */
+ u32 untag_bridge_pvid:1;
+
+ /* Let DSA manage the FDB entries towards the
+ * CPU, based on the software bridge database.
+ */
+ u32 assisted_learning_on_cpu_port:1;
+
+ /* In case vlan_filtering_is_global is set, the VLAN awareness state
+ * should be retrieved from here and not from the per-port settings.
+ */
+ u32 vlan_filtering:1;
+
+ /* For switches that only have the MRU configurable. To ensure the
+ * configured MTU is not exceeded, normalization of MRU on all bridged
+ * interfaces is needed.
+ */
+ u32 mtu_enforcement_ingress:1;
+
+ /* Drivers that isolate the FDBs of multiple bridges must set this
+ * to true to receive the bridge as an argument in .port_fdb_{add,del}
+ * and .port_mdb_{add,del}. Otherwise, the bridge.num will always be
+ * passed as zero.
+ */
+ u32 fdb_isolation:1;
+
/* Listener for switch fabric events */
struct notifier_block nb;
@@ -243,6 +449,8 @@ struct dsa_switch {
*/
void *priv;
+ void *tagger_data;
+
/*
* Configuration data for this switch.
*/
@@ -263,28 +471,31 @@ struct dsa_switch {
unsigned int ageing_time_min;
unsigned int ageing_time_max;
+ /* Storage for drivers using tag_8021q */
+ struct dsa_8021q_context *tag_8021q_ctx;
+
/* devlink used to represent this switch device */
struct devlink *devlink;
/* Number of switch port queues */
unsigned int num_tx_queues;
- /* Disallow bridge core from requesting different VLAN awareness
- * settings on ports if not hardware-supported
- */
- bool vlan_filtering_is_global;
-
- /* In case vlan_filtering_is_global is set, the VLAN awareness state
- * should be retrieved from here and not from the per-port settings.
+ /* Drivers that benefit from having an ID associated with each
+ * offloaded LAG should set this to the maximum number of
+ * supported IDs. DSA will then maintain a mapping of _at
+ * least_ these many IDs, accessible to drivers via
+ * dsa_lag_id().
*/
- bool vlan_filtering;
+ unsigned int num_lag_ids;
- /* MAC PCS does not provide link state change interrupt, and requires
- * polling. Flag passed on to PHYLINK.
+ /* Drivers that support bridge forwarding offload or FDB isolation
+ * should set this to the maximum number of bridges spanning the same
+ * switch tree (or all trees, in the case of cross-tree bridging
+ * support) that can be offloaded.
*/
- bool pcs_poll;
+ unsigned int max_num_bridges;
- size_t num_ports;
+ unsigned int num_ports;
};
static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
@@ -299,6 +510,32 @@ static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
return NULL;
}
+static inline bool dsa_port_is_dsa(struct dsa_port *port)
+{
+ return port->type == DSA_PORT_TYPE_DSA;
+}
+
+static inline bool dsa_port_is_cpu(struct dsa_port *port)
+{
+ return port->type == DSA_PORT_TYPE_CPU;
+}
+
+static inline bool dsa_port_is_user(struct dsa_port *dp)
+{
+ return dp->type == DSA_PORT_TYPE_USER;
+}
+
+static inline bool dsa_port_is_unused(struct dsa_port *dp)
+{
+ return dp->type == DSA_PORT_TYPE_UNUSED;
+}
+
+static inline bool dsa_port_master_is_operational(struct dsa_port *dp)
+{
+ return dsa_port_is_cpu(dp) && dp->master_admin_up &&
+ dp->master_oper_up;
+}
+
static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
{
return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED;
@@ -319,14 +556,64 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p)
return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER;
}
+#define dsa_tree_for_each_user_port(_dp, _dst) \
+ list_for_each_entry((_dp), &(_dst)->ports, list) \
+ if (dsa_port_is_user((_dp)))
+
+#define dsa_tree_for_each_user_port_continue_reverse(_dp, _dst) \
+ list_for_each_entry_continue_reverse((_dp), &(_dst)->ports, list) \
+ if (dsa_port_is_user((_dp)))
+
+#define dsa_tree_for_each_cpu_port(_dp, _dst) \
+ list_for_each_entry((_dp), &(_dst)->ports, list) \
+ if (dsa_port_is_cpu((_dp)))
+
+#define dsa_switch_for_each_port(_dp, _ds) \
+ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \
+ if ((_dp)->ds == (_ds))
+
+#define dsa_switch_for_each_port_safe(_dp, _next, _ds) \
+ list_for_each_entry_safe((_dp), (_next), &(_ds)->dst->ports, list) \
+ if ((_dp)->ds == (_ds))
+
+#define dsa_switch_for_each_port_continue_reverse(_dp, _ds) \
+ list_for_each_entry_continue_reverse((_dp), &(_ds)->dst->ports, list) \
+ if ((_dp)->ds == (_ds))
+
+#define dsa_switch_for_each_available_port(_dp, _ds) \
+ dsa_switch_for_each_port((_dp), (_ds)) \
+ if (!dsa_port_is_unused((_dp)))
+
+#define dsa_switch_for_each_user_port(_dp, _ds) \
+ dsa_switch_for_each_port((_dp), (_ds)) \
+ if (dsa_port_is_user((_dp)))
+
+#define dsa_switch_for_each_cpu_port(_dp, _ds) \
+ dsa_switch_for_each_port((_dp), (_ds)) \
+ if (dsa_port_is_cpu((_dp)))
+
+#define dsa_switch_for_each_cpu_port_continue_reverse(_dp, _ds) \
+ dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \
+ if (dsa_port_is_cpu((_dp)))
+
static inline u32 dsa_user_ports(struct dsa_switch *ds)
{
+ struct dsa_port *dp;
u32 mask = 0;
- int p;
- for (p = 0; p < ds->num_ports; p++)
- if (dsa_is_user_port(ds, p))
- mask |= BIT(p);
+ dsa_switch_for_each_user_port(dp, ds)
+ mask |= BIT(dp->index);
+
+ return mask;
+}
+
+static inline u32 dsa_cpu_ports(struct dsa_switch *ds)
+{
+ struct dsa_port *cpu_dp;
+ u32 mask = 0;
+
+ dsa_switch_for_each_cpu_port(cpu_dp, ds)
+ mask |= BIT(cpu_dp->index);
return mask;
}
@@ -366,6 +653,50 @@ static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port)
return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index);
}
+/* Return true if this is the local port used to reach the CPU port */
+static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port)
+{
+ if (dsa_is_unused_port(ds, port))
+ return false;
+
+ return port == dsa_upstream_port(ds, port);
+}
+
+/* Return true if this is a DSA port leading away from the CPU */
+static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port)
+{
+ return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port);
+}
+
+/* Return the local port used to reach the CPU port */
+static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+
+ dsa_switch_for_each_available_port(dp, ds) {
+ return dsa_upstream_port(ds, dp->index);
+ }
+
+ return ds->num_ports;
+}
+
+/* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning
+ * that the routing port from @downstream_ds to @upstream_ds is also the port
+ * which @downstream_ds uses to reach its dedicated CPU.
+ */
+static inline bool dsa_switch_is_upstream_of(struct dsa_switch *upstream_ds,
+ struct dsa_switch *downstream_ds)
+{
+ int routing_port;
+
+ if (upstream_ds == downstream_ds)
+ return true;
+
+ routing_port = dsa_routing_port(downstream_ds, upstream_ds->index);
+
+ return dsa_is_upstream_port(downstream_ds, routing_port);
+}
+
static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
{
const struct dsa_switch *ds = dp->ds;
@@ -376,15 +707,157 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
return dp->vlan_filtering;
}
+static inline unsigned int dsa_port_lag_id_get(struct dsa_port *dp)
+{
+ return dp->lag ? dp->lag->id : 0;
+}
+
+static inline struct net_device *dsa_port_lag_dev_get(struct dsa_port *dp)
+{
+ return dp->lag ? dp->lag->dev : NULL;
+}
+
+static inline bool dsa_port_offloads_lag(struct dsa_port *dp,
+ const struct dsa_lag *lag)
+{
+ return dsa_port_lag_dev_get(dp) == lag->dev;
+}
+
+static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp)
+{
+ if (dp->cpu_port_in_lag)
+ return dsa_port_lag_dev_get(dp->cpu_dp);
+
+ return dp->cpu_dp->master;
+}
+
+static inline
+struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
+{
+ if (!dp->bridge)
+ return NULL;
+
+ if (dp->lag)
+ return dp->lag->dev;
+ else if (dp->hsr_dev)
+ return dp->hsr_dev;
+
+ return dp->slave;
+}
+
+static inline struct net_device *
+dsa_port_bridge_dev_get(const struct dsa_port *dp)
+{
+ return dp->bridge ? dp->bridge->dev : NULL;
+}
+
+static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp)
+{
+ return dp->bridge ? dp->bridge->num : 0;
+}
+
+static inline bool dsa_port_bridge_same(const struct dsa_port *a,
+ const struct dsa_port *b)
+{
+ struct net_device *br_a = dsa_port_bridge_dev_get(a);
+ struct net_device *br_b = dsa_port_bridge_dev_get(b);
+
+ /* Standalone ports are not in the same bridge with one another */
+ return (!br_a || !br_b) ? false : (br_a == br_b);
+}
+
+static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
+ const struct net_device *dev)
+{
+ return dsa_port_to_bridge_port(dp) == dev;
+}
+
+static inline bool
+dsa_port_offloads_bridge_dev(struct dsa_port *dp,
+ const struct net_device *bridge_dev)
+{
+ /* DSA ports connected to a bridge, and event was emitted
+ * for the bridge.
+ */
+ return dsa_port_bridge_dev_get(dp) == bridge_dev;
+}
+
+static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
+ const struct dsa_bridge *bridge)
+{
+ return dsa_port_bridge_dev_get(dp) == bridge->dev;
+}
+
+/* Returns true if any port of this tree offloads the given net_device */
+static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
+ const struct net_device *dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_offloads_bridge_port(dp, dev))
+ return true;
+
+ return false;
+}
+
+/* Returns true if any port of this tree offloads the given bridge */
+static inline bool
+dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst,
+ const struct net_device *bridge_dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_offloads_bridge_dev(dp, bridge_dev))
+ return true;
+
+ return false;
+}
+
+static inline bool dsa_port_tree_same(const struct dsa_port *a,
+ const struct dsa_port *b)
+{
+ return a->ds->dst == b->ds->dst;
+}
+
typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
bool is_static, void *data);
struct dsa_switch_ops {
+ /*
+ * Tagging protocol helpers called for the CPU ports and DSA links.
+ * @get_tag_protocol retrieves the initial tagging protocol and is
+ * mandatory. Switches which can operate using multiple tagging
+ * protocols should implement @change_tag_protocol and report in
+ * @get_tag_protocol the tagger in current use.
+ */
enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds,
int port,
enum dsa_tag_protocol mprot);
+ int (*change_tag_protocol)(struct dsa_switch *ds,
+ enum dsa_tag_protocol proto);
+ /*
+ * Method for switch drivers to connect to the tagging protocol driver
+ * in current use. The switch driver can provide handlers for certain
+ * types of packets for switch management.
+ */
+ int (*connect_tag_protocol)(struct dsa_switch *ds,
+ enum dsa_tag_protocol proto);
+
+ int (*port_change_master)(struct dsa_switch *ds, int port,
+ struct net_device *master,
+ struct netlink_ext_ack *extack);
+ /* Optional switch-wide initialization and destruction methods */
int (*setup)(struct dsa_switch *ds);
void (*teardown)(struct dsa_switch *ds);
+
+ /* Per-port initialization and destruction methods. Mandatory if the
+ * driver registers devlink port regions, optional otherwise.
+ */
+ int (*port_setup)(struct dsa_switch *ds, int port);
+ void (*port_teardown)(struct dsa_switch *ds, int port);
+
u32 (*get_phy_flags)(struct dsa_switch *ds, int port);
/*
@@ -405,9 +878,14 @@ struct dsa_switch_ops {
/*
* PHYLINK integration
*/
+ void (*phylink_get_caps)(struct dsa_switch *ds, int port,
+ struct phylink_config *config);
void (*phylink_validate)(struct dsa_switch *ds, int port,
unsigned long *supported,
struct phylink_link_state *state);
+ struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
+ int port,
+ phy_interface_t iface);
int (*phylink_mac_link_state)(struct dsa_switch *ds, int port,
struct phylink_link_state *state);
void (*phylink_mac_config)(struct dsa_switch *ds, int port,
@@ -420,11 +898,13 @@ struct dsa_switch_ops {
void (*phylink_mac_link_up)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface,
- struct phy_device *phydev);
+ struct phy_device *phydev,
+ int speed, int duplex,
+ bool tx_pause, bool rx_pause);
void (*phylink_fixed_state)(struct dsa_switch *ds, int port,
struct phylink_link_state *state);
/*
- * ethtool hardware statistics.
+ * Port statistics counters.
*/
void (*get_strings)(struct dsa_switch *ds, int port,
u32 stringset, uint8_t *data);
@@ -433,6 +913,21 @@ struct dsa_switch_ops {
int (*get_sset_count)(struct dsa_switch *ds, int port, int sset);
void (*get_ethtool_phy_stats)(struct dsa_switch *ds,
int port, uint64_t *data);
+ void (*get_eth_phy_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_eth_phy_stats *phy_stats);
+ void (*get_eth_mac_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_eth_mac_stats *mac_stats);
+ void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_eth_ctrl_stats *ctrl_stats);
+ void (*get_rmon_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges);
+ void (*get_stats64)(struct dsa_switch *ds, int port,
+ struct rtnl_link_stats64 *s);
+ void (*get_pause_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_pause_stats *pause_stats);
+ void (*self_test)(struct dsa_switch *ds, int port,
+ struct ethtool_test *etest, u64 *data);
/*
* ethtool Wake-on-LAN
@@ -449,6 +944,18 @@ struct dsa_switch_ops {
struct ethtool_ts_info *ts);
/*
+ * DCB ops
+ */
+ int (*port_get_default_prio)(struct dsa_switch *ds, int port);
+ int (*port_set_default_prio)(struct dsa_switch *ds, int port,
+ u8 prio);
+ int (*port_get_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp);
+ int (*port_add_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp,
+ u8 prio);
+ int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp,
+ u8 prio);
+
+ /*
* Suspend and resume
*/
int (*suspend)(struct dsa_switch *ds);
@@ -484,49 +991,77 @@ struct dsa_switch_ops {
struct ethtool_regs *regs, void *p);
/*
+ * Upper device tracking.
+ */
+ int (*port_prechangeupper)(struct dsa_switch *ds, int port,
+ struct netdev_notifier_changeupper_info *info);
+
+ /*
* Bridge integration
*/
int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs);
int (*port_bridge_join)(struct dsa_switch *ds, int port,
- struct net_device *bridge);
+ struct dsa_bridge bridge,
+ bool *tx_fwd_offload,
+ struct netlink_ext_ack *extack);
void (*port_bridge_leave)(struct dsa_switch *ds, int port,
- struct net_device *bridge);
+ struct dsa_bridge bridge);
void (*port_stp_state_set)(struct dsa_switch *ds, int port,
u8 state);
+ int (*port_mst_state_set)(struct dsa_switch *ds, int port,
+ const struct switchdev_mst_state *state);
void (*port_fast_age)(struct dsa_switch *ds, int port);
- int (*port_egress_floods)(struct dsa_switch *ds, int port,
- bool unicast, bool multicast);
+ int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid);
+ int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack);
+ int (*port_bridge_flags)(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack);
+ void (*port_set_host_flood)(struct dsa_switch *ds, int port,
+ bool uc, bool mc);
/*
* VLAN support
*/
int (*port_vlan_filtering)(struct dsa_switch *ds, int port,
- bool vlan_filtering);
- int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan);
- void (*port_vlan_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan);
+ bool vlan_filtering,
+ struct netlink_ext_ack *extack);
+ int (*port_vlan_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
int (*port_vlan_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan);
+ int (*vlan_msti_set)(struct dsa_switch *ds, struct dsa_bridge bridge,
+ const struct switchdev_vlan_msti *msti);
+
/*
* Forwarding database
*/
int (*port_fdb_add)(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid);
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db);
int (*port_fdb_del)(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid);
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db);
int (*port_fdb_dump)(struct dsa_switch *ds, int port,
dsa_fdb_dump_cb_t *cb, void *data);
+ int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db);
+ int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db);
/*
* Multicast database
*/
- int (*port_mdb_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb);
- void (*port_mdb_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb);
+ int (*port_mdb_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db);
int (*port_mdb_del)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb);
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db);
/*
* RXNFC
*/
@@ -538,21 +1073,41 @@ struct dsa_switch_ops {
/*
* TC integration
*/
+ int (*cls_flower_add)(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress);
+ int (*cls_flower_del)(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress);
+ int (*cls_flower_stats)(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress);
int (*port_mirror_add)(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror,
- bool ingress);
+ bool ingress, struct netlink_ext_ack *extack);
void (*port_mirror_del)(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror);
+ int (*port_policer_add)(struct dsa_switch *ds, int port,
+ struct dsa_mall_policer_tc_entry *policer);
+ void (*port_policer_del)(struct dsa_switch *ds, int port);
int (*port_setup_tc)(struct dsa_switch *ds, int port,
enum tc_setup_type type, void *type_data);
/*
* Cross-chip operations
*/
- int (*crosschip_bridge_join)(struct dsa_switch *ds, int sw_index,
- int port, struct net_device *br);
- void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
- int port, struct net_device *br);
+ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index,
+ int sw_index, int port,
+ struct dsa_bridge bridge,
+ struct netlink_ext_ack *extack);
+ void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index,
+ int sw_index, int port,
+ struct dsa_bridge bridge);
+ int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index,
+ int port);
+ int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index,
+ int port, struct dsa_lag lag,
+ struct netdev_lag_upper_info *info,
+ struct netlink_ext_ack *extack);
+ int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index,
+ int port, struct dsa_lag lag);
/*
* PTP functionality
@@ -561,16 +1116,108 @@ struct dsa_switch_ops {
struct ifreq *ifr);
int (*port_hwtstamp_set)(struct dsa_switch *ds, int port,
struct ifreq *ifr);
- bool (*port_txtstamp)(struct dsa_switch *ds, int port,
- struct sk_buff *clone, unsigned int type);
+ void (*port_txtstamp)(struct dsa_switch *ds, int port,
+ struct sk_buff *skb);
bool (*port_rxtstamp)(struct dsa_switch *ds, int port,
struct sk_buff *skb, unsigned int type);
- /* Devlink parameters */
+ /* Devlink parameters, etc */
int (*devlink_param_get)(struct dsa_switch *ds, u32 id,
struct devlink_param_gset_ctx *ctx);
int (*devlink_param_set)(struct dsa_switch *ds, u32 id,
struct devlink_param_gset_ctx *ctx);
+ int (*devlink_info_get)(struct dsa_switch *ds,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack);
+ int (*devlink_sb_pool_get)(struct dsa_switch *ds,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info);
+ int (*devlink_sb_pool_set)(struct dsa_switch *ds, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack);
+ int (*devlink_sb_port_pool_get)(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold);
+ int (*devlink_sb_port_pool_set)(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold,
+ struct netlink_ext_ack *extack);
+ int (*devlink_sb_tc_pool_bind_get)(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold);
+ int (*devlink_sb_tc_pool_bind_set)(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack);
+ int (*devlink_sb_occ_snapshot)(struct dsa_switch *ds,
+ unsigned int sb_index);
+ int (*devlink_sb_occ_max_clear)(struct dsa_switch *ds,
+ unsigned int sb_index);
+ int (*devlink_sb_occ_port_pool_get)(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_cur, u32 *p_max);
+ int (*devlink_sb_occ_tc_port_bind_get)(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max);
+
+ /*
+ * MTU change functionality. Switches can also adjust their MRU through
+ * this method. By MTU, one understands the SDU (L2 payload) length.
+ * If the switch needs to account for the DSA tag on the CPU port, this
+ * method needs to do so privately.
+ */
+ int (*port_change_mtu)(struct dsa_switch *ds, int port,
+ int new_mtu);
+ int (*port_max_mtu)(struct dsa_switch *ds, int port);
+
+ /*
+ * LAG integration
+ */
+ int (*port_lag_change)(struct dsa_switch *ds, int port);
+ int (*port_lag_join)(struct dsa_switch *ds, int port,
+ struct dsa_lag lag,
+ struct netdev_lag_upper_info *info,
+ struct netlink_ext_ack *extack);
+ int (*port_lag_leave)(struct dsa_switch *ds, int port,
+ struct dsa_lag lag);
+
+ /*
+ * HSR integration
+ */
+ int (*port_hsr_join)(struct dsa_switch *ds, int port,
+ struct net_device *hsr);
+ int (*port_hsr_leave)(struct dsa_switch *ds, int port,
+ struct net_device *hsr);
+
+ /*
+ * MRP integration
+ */
+ int (*port_mrp_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_mrp *mrp);
+ int (*port_mrp_del)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_mrp *mrp);
+ int (*port_mrp_add_ring_role)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_ring_role_mrp *mrp);
+ int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_ring_role_mrp *mrp);
+
+ /*
+ * tag_8021q operations
+ */
+ int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid,
+ u16 flags);
+ int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
+
+ /*
+ * DSA master tracking operations
+ */
+ void (*master_state_change)(struct dsa_switch *ds,
+ const struct net_device *master,
+ bool operational);
};
#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \
@@ -602,11 +1249,44 @@ void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
void *occ_get_priv);
void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
u64 resource_id);
+struct devlink_region *
+dsa_devlink_region_create(struct dsa_switch *ds,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size);
+struct devlink_region *
+dsa_devlink_port_region_create(struct dsa_switch *ds,
+ int port,
+ const struct devlink_port_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size);
+void dsa_devlink_region_destroy(struct devlink_region *region);
+
+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
struct dsa_devlink_priv {
struct dsa_switch *ds;
};
+static inline struct dsa_switch *dsa_devlink_to_ds(struct devlink *dl)
+{
+ struct dsa_devlink_priv *dl_priv = devlink_priv(dl);
+
+ return dl_priv->ds;
+}
+
+static inline
+struct dsa_switch *dsa_devlink_port_to_ds(struct devlink_port *port)
+{
+ struct devlink *dl = port->devlink;
+ struct dsa_devlink_priv *dl_priv = devlink_priv(dl);
+
+ return dl_priv->ds;
+}
+
+static inline int dsa_devlink_port_to_port(struct devlink_port *port)
+{
+ return port->index;
+}
+
struct dsa_switch_driver {
struct list_head list;
const struct dsa_switch_ops *ops;
@@ -614,8 +1294,15 @@ struct dsa_switch_driver {
struct net_device *dsa_dev_to_net_device(struct device *dev);
+bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db);
+bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db);
+
/* Keep inline for faster access in hot path */
-static inline bool netdev_uses_dsa(struct net_device *dev)
+static inline bool netdev_uses_dsa(const struct net_device *dev)
{
#if IS_ENABLED(CONFIG_NET_DSA)
return dev->dsa_ptr && dev->dsa_ptr->rcv;
@@ -623,86 +1310,97 @@ static inline bool netdev_uses_dsa(struct net_device *dev)
return false;
}
-static inline bool dsa_can_decode(const struct sk_buff *skb,
- struct net_device *dev)
+/* All DSA tags that push the EtherType to the right (basically all except tail
+ * tags, which don't break dissection) can be treated the same from the
+ * perspective of the flow dissector.
+ *
+ * We need to return:
+ * - offset: the (B - A) difference between:
+ * A. the position of the real EtherType and
+ * B. the current skb->data (aka ETH_HLEN bytes into the frame, aka 2 bytes
+ * after the normal EtherType was supposed to be)
+ * The offset in bytes is exactly equal to the tagger overhead (and half of
+ * that, in __be16 shorts).
+ *
+ * - proto: the value of the real EtherType.
+ */
+static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb,
+ __be16 *proto, int *offset)
{
#if IS_ENABLED(CONFIG_NET_DSA)
- return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev);
+ const struct dsa_device_ops *ops = skb->dev->dsa_ptr->tag_ops;
+ int tag_len = ops->needed_headroom;
+
+ *offset = tag_len;
+ *proto = ((__be16 *)skb->data)[(tag_len / 2) - 1];
#endif
- return false;
}
-void dsa_unregister_switch(struct dsa_switch *ds);
-int dsa_register_switch(struct dsa_switch *ds);
-#ifdef CONFIG_PM_SLEEP
-int dsa_switch_suspend(struct dsa_switch *ds);
-int dsa_switch_resume(struct dsa_switch *ds);
-#else
-static inline int dsa_switch_suspend(struct dsa_switch *ds)
-{
- return 0;
-}
-static inline int dsa_switch_resume(struct dsa_switch *ds)
+#if IS_ENABLED(CONFIG_NET_DSA)
+static inline int __dsa_netdevice_ops_check(struct net_device *dev)
{
+ int err = -EOPNOTSUPP;
+
+ if (!dev->dsa_ptr)
+ return err;
+
+ if (!dev->dsa_ptr->netdev_ops)
+ return err;
+
return 0;
}
-#endif /* CONFIG_PM_SLEEP */
-enum dsa_notifier_type {
- DSA_PORT_REGISTER,
- DSA_PORT_UNREGISTER,
-};
+static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
+ int cmd)
+{
+ const struct dsa_netdevice_ops *ops;
+ int err;
-struct dsa_notifier_info {
- struct net_device *dev;
-};
+ err = __dsa_netdevice_ops_check(dev);
+ if (err)
+ return err;
-struct dsa_notifier_register_info {
- struct dsa_notifier_info info; /* must be first */
- struct net_device *master;
- unsigned int port_number;
- unsigned int switch_number;
-};
+ ops = dev->dsa_ptr->netdev_ops;
-static inline struct net_device *
-dsa_notifier_info_to_dev(const struct dsa_notifier_info *info)
+ return ops->ndo_eth_ioctl(dev, ifr, cmd);
+}
+#else
+static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
+ int cmd)
{
- return info->dev;
+ return -EOPNOTSUPP;
}
+#endif
-#if IS_ENABLED(CONFIG_NET_DSA)
-int register_dsa_notifier(struct notifier_block *nb);
-int unregister_dsa_notifier(struct notifier_block *nb);
-int call_dsa_notifiers(unsigned long val, struct net_device *dev,
- struct dsa_notifier_info *info);
+void dsa_unregister_switch(struct dsa_switch *ds);
+int dsa_register_switch(struct dsa_switch *ds);
+void dsa_switch_shutdown(struct dsa_switch *ds);
+struct dsa_switch *dsa_switch_find(int tree_index, int sw_index);
+void dsa_flush_workqueue(void);
+#ifdef CONFIG_PM_SLEEP
+int dsa_switch_suspend(struct dsa_switch *ds);
+int dsa_switch_resume(struct dsa_switch *ds);
#else
-static inline int register_dsa_notifier(struct notifier_block *nb)
+static inline int dsa_switch_suspend(struct dsa_switch *ds)
{
return 0;
}
-
-static inline int unregister_dsa_notifier(struct notifier_block *nb)
+static inline int dsa_switch_resume(struct dsa_switch *ds)
{
return 0;
}
+#endif /* CONFIG_PM_SLEEP */
-static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
- struct dsa_notifier_info *info)
+#if IS_ENABLED(CONFIG_NET_DSA)
+bool dsa_slave_dev_check(const struct net_device *dev);
+#else
+static inline bool dsa_slave_dev_check(const struct net_device *dev)
{
- return NOTIFY_DONE;
+ return false;
}
#endif
-/* Broadcom tag specific helpers to insert and extract queue/port number */
-#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q)
-#define BRCM_TAG_GET_PORT(v) ((v) >> 8)
-#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
-
-
netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
-int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
-int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data);
-int dsa_port_get_phy_sset_count(struct dsa_port *dp);
void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
struct dsa_tag_driver {
@@ -735,7 +1433,7 @@ module_exit(dsa_tag_driver_module_exit)
/**
* module_dsa_tag_drivers() - Helper macro for registering DSA tag
* drivers
- * @__ops_array: Array of tag driver strucutres
+ * @__ops_array: Array of tag driver structures
*
* Helper macro for DSA tag drivers which do not do anything special
* in module init/exit. Each module may only use this macro once, and
diff --git a/include/net/dst.h b/include/net/dst.h
index 3448cf865ede..00b479ce6b99 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -18,6 +18,7 @@
#include <linux/refcount.h>
#include <net/neighbour.h>
#include <asm/processor.h>
+#include <linux/indirect_call_wrapper.h>
struct sk_buff;
@@ -35,7 +36,6 @@ struct dst_entry {
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
unsigned short flags;
-#define DST_HOST 0x0001
#define DST_NOXFRM 0x0002
#define DST_NOPOLICY 0x0004
#define DST_NOCOUNT 0x0008
@@ -77,6 +77,7 @@ struct dst_entry {
#ifndef CONFIG_64BIT
atomic_t __refcnt; /* 32-bit offset 64 */
#endif
+ netdevice_tracker dev_tracker;
};
struct dst_metrics {
@@ -194,9 +195,11 @@ dst_feature(const struct dst_entry *dst, u32 feature)
return dst_metric(dst, RTAX_FEATURES) & feature;
}
+INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *));
+INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *));
static inline u32 dst_mtu(const struct dst_entry *dst)
{
- return dst->ops->mtu(dst);
+ return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst);
}
/* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
@@ -215,7 +218,7 @@ dst_allfrag(const struct dst_entry *dst)
static inline int
dst_metric_locked(const struct dst_entry *dst, int metric)
{
- return dst_metric(dst, RTAX_LOCK) & (1<<metric);
+ return dst_metric(dst, RTAX_LOCK) & (1 << metric);
}
static inline void dst_hold(struct dst_entry *dst)
@@ -236,12 +239,6 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
}
}
-static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time)
-{
- dst_hold(dst);
- dst_use_noref(dst, time);
-}
-
static inline struct dst_entry *dst_clone(struct dst_entry *dst)
{
if (dst)
@@ -275,6 +272,7 @@ static inline void skb_dst_drop(struct sk_buff *skb)
static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
{
+ nskb->slow_gro |= !!refdst;
nskb->_skb_refdst = refdst;
if (!(nskb->_skb_refdst & SKB_DST_NOREF))
dst_clone(skb_dst(nskb));
@@ -314,6 +312,7 @@ static inline bool skb_dst_force(struct sk_buff *skb)
dst = NULL;
skb->_skb_refdst = (unsigned long)dst;
+ skb->slow_gro |= !!dst;
}
return skb->_skb_refdst != 0UL;
@@ -401,7 +400,13 @@ static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, co
static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
struct sk_buff *skb)
{
- struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL);
+ struct neighbour *n;
+
+ if (WARN_ON_ONCE(!dst->ops->neigh_lookup))
+ return NULL;
+
+ n = dst->ops->neigh_lookup(dst, skb, NULL);
+
return IS_ERR(n) ? NULL : n;
}
@@ -430,22 +435,36 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
dst->expires = expires;
}
+INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
+ struct sk_buff *));
/* Output packet to network from transport. */
static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return skb_dst(skb)->output(net, sk, skb);
+ return INDIRECT_CALL_INET(skb_dst(skb)->output,
+ ip6_output, ip_output,
+ net, sk, skb);
}
+INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *));
/* Input packet from network to transport. */
static inline int dst_input(struct sk_buff *skb)
{
- return skb_dst(skb)->input(skb);
+ return INDIRECT_CALL_INET(skb_dst(skb)->input,
+ ip6_input, ip_local_deliver, skb);
}
+INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
+ u32));
+INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
+ u32));
static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
{
if (dst->obsolete)
- dst = dst->ops->check(dst, cookie);
+ dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check,
+ ipv4_dst_check, dst, cookie);
return dst;
}
@@ -528,14 +547,15 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}
-static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
- struct dst_entry *encap_dst,
- int headroom)
-{
- u32 encap_mtu = dst_mtu(encap_dst);
-
- if (skb->len > encap_mtu - headroom)
- skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
-}
+struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
+void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu, bool confirm_neigh);
+void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old);
+struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr);
+unsigned int dst_blackhole_mtu(const struct dst_entry *dst);
#endif /* _NET_DST_H */
diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index 67634675e919..df6622a5fe98 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -80,6 +80,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache)
}
/**
+ * dst_cache_reset_now - invalidate the cache contents immediately
+ * @dst_cache: the cache
+ *
+ * The caller must be sure there are no concurrent users, as this frees
+ * all dst_cache users immediately, rather than waiting for the next
+ * per-cpu usage like dst_cache_reset does. Most callers should use the
+ * higher speed lazily-freed dst_cache_reset function instead.
+ */
+void dst_cache_reset_now(struct dst_cache *dst_cache);
+
+/**
* dst_cache_init - initialize the cache, allocating the required storage
* @dst_cache: the cache
* @gfp: allocation flags
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 56cb3c38569a..a454cf4327fe 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -4,11 +4,14 @@
#include <linux/skbuff.h>
#include <net/ip_tunnels.h>
+#include <net/macsec.h>
#include <net/dst.h>
enum metadata_type {
METADATA_IP_TUNNEL,
METADATA_HW_PORT_MUX,
+ METADATA_MACSEC,
+ METADATA_XFRM,
};
struct hw_port_info {
@@ -16,12 +19,23 @@ struct hw_port_info {
u32 port_id;
};
+struct macsec_info {
+ sci_t sci;
+};
+
+struct xfrm_md_info {
+ u32 if_id;
+ int link;
+};
+
struct metadata_dst {
struct dst_entry dst;
enum metadata_type type;
union {
struct ip_tunnel_info tun_info;
struct hw_port_info port_info;
+ struct macsec_info macsec_info;
+ struct xfrm_md_info xfrm_info;
} u;
};
@@ -45,12 +59,35 @@ skb_tunnel_info(const struct sk_buff *skb)
return &md_dst->u.tun_info;
dst = skb_dst(skb);
- if (dst && dst->lwtstate)
+ if (dst && dst->lwtstate &&
+ (dst->lwtstate->type == LWTUNNEL_ENCAP_IP ||
+ dst->lwtstate->type == LWTUNNEL_ENCAP_IP6))
return lwt_tun_info(dst->lwtstate);
return NULL;
}
+static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt)
+{
+ return (struct xfrm_md_info *)lwt->data;
+}
+
+static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ struct dst_entry *dst;
+
+ if (md_dst && md_dst->type == METADATA_XFRM)
+ return &md_dst->u.xfrm_info;
+
+ dst = skb_dst(skb);
+ if (dst && dst->lwtstate &&
+ dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM)
+ return lwt_xfrm_info(dst->lwtstate);
+
+ return NULL;
+}
+
static inline bool skb_valid_dst(const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -80,6 +117,12 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
return memcmp(&a->u.tun_info, &b->u.tun_info,
sizeof(a->u.tun_info) +
a->u.tun_info.options_len);
+ case METADATA_MACSEC:
+ return memcmp(&a->u.macsec_info, &b->u.macsec_info,
+ sizeof(a->u.macsec_info));
+ case METADATA_XFRM:
+ return memcmp(&a->u.xfrm_info, &b->u.xfrm_info,
+ sizeof(a->u.xfrm_info));
default:
return 1;
}
@@ -121,8 +164,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
sizeof(struct ip_tunnel_info) + md_size);
+#ifdef CONFIG_DST_CACHE
+ /* Unclone the dst cache if there is one */
+ if (new_md->u.tun_info.dst_cache.cache) {
+ int ret;
+
+ ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC);
+ if (ret) {
+ metadata_dst_free(new_md);
+ return ERR_PTR(ret);
+ }
+ }
+#endif
+
skb_dst_drop(skb);
- dst_hold(&new_md->dst);
skb_dst_set(skb, &new_md->dst);
return new_md;
}
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 443863c7b8da..88ff7bb2bb9b 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -53,9 +53,11 @@ static inline int dst_entries_get_slow(struct dst_ops *dst)
return percpu_counter_sum_positive(&dst->pcpuc_entries);
}
+#define DST_PERCPU_COUNTER_BATCH 32
static inline void dst_entries_add(struct dst_ops *dst, int val)
{
- percpu_counter_add(&dst->pcpuc_entries, val);
+ percpu_counter_add_batch(&dst->pcpuc_entries, val,
+ DST_PERCPU_COUNTER_BATCH);
}
static inline int dst_entries_init(struct dst_ops *dst)
diff --git a/include/net/erspan.h b/include/net/erspan.h
index b39643ef4c95..6cb4cbd6a48f 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -2,7 +2,19 @@
#define __LINUX_ERSPAN_H
/*
- * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes
+ * GRE header for ERSPAN type I encapsulation (4 octets [34:37])
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |0|0|0|0|0|00000|000000000|00000| Protocol Type for ERSPAN |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * The Type I ERSPAN frame format is based on the barebones IP + GRE
+ * encapsulation (as described above) on top of the raw mirrored frame.
+ * There is no extra ERSPAN header.
+ *
+ *
+ * GRE header for ERSPAN type II and II encapsulation (8 octets [34:41])
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -43,9 +55,12 @@
* | Platform Specific Info |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
- * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
+ * GRE proto ERSPAN type I/II = 0x88BE, type III = 0x22EB
*/
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
#include <uapi/linux/erspan.h>
#define ERSPAN_VERSION 0x1 /* ERSPAN type II */
@@ -139,6 +154,9 @@ static inline u8 get_hwid(const struct erspan_md2 *md2)
static inline int erspan_hdr_len(int version)
{
+ if (version == 0)
+ return 0;
+
return sizeof(struct erspan_base_hdr) +
(version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE);
}
diff --git a/include/net/esp.h b/include/net/esp.h
index 117652eb6ea3..322950727dd0 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -5,12 +5,29 @@
#include <linux/skbuff.h>
struct ip_esp_hdr;
+struct xfrm_state;
static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
{
return (struct ip_esp_hdr *)skb_transport_header(skb);
}
+static inline void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
+{
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+}
+
struct esp_info {
struct ip_esp_hdr *esph;
__be64 seqno;
diff --git a/include/net/espintcp.h b/include/net/espintcp.h
index dd7026a00066..0335bbd76552 100644
--- a/include/net/espintcp.h
+++ b/include/net/espintcp.h
@@ -25,6 +25,7 @@ struct espintcp_ctx {
struct espintcp_msg partial;
void (*saved_data_ready)(struct sock *sk);
void (*saved_write_space)(struct sock *sk);
+ void (*saved_destruct)(struct sock *sk);
struct work_struct work;
bool tx_running;
};
diff --git a/include/net/ethoc.h b/include/net/ethoc.h
index 78519ed42ab4..73810f3ca492 100644
--- a/include/net/ethoc.h
+++ b/include/net/ethoc.h
@@ -10,6 +10,9 @@
#ifndef LINUX_NET_ETHOC_H
#define LINUX_NET_ETHOC_H 1
+#include <linux/if.h>
+#include <linux/types.h>
+
struct ethoc_platform_data {
u8 hwaddr[IFHWADDRLEN];
s8 phy_id;
diff --git a/include/net/failover.h b/include/net/failover.h
index bb15438f39c7..f2b42b4b9cd6 100644
--- a/include/net/failover.h
+++ b/include/net/failover.h
@@ -25,6 +25,7 @@ struct failover_ops {
struct failover {
struct list_head list;
struct net_device __rcu *failover_dev;
+ netdevice_tracker dev_tracker;
struct failover_ops __rcu *ops;
};
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index a259050f84af..82da359bca03 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -10,6 +10,7 @@
#include <net/flow.h>
#include <net/rtnetlink.h>
#include <net/fib_notifier.h>
+#include <linux/indirect_call_wrapper.h>
struct fib_kuid_range {
kuid_t start;
@@ -68,7 +69,7 @@ struct fib_rules_ops {
int (*action)(struct fib_rule *,
struct flowi *, int,
struct fib_lookup_arg *);
- bool (*suppress)(struct fib_rule *,
+ bool (*suppress)(struct fib_rule *, int,
struct fib_lookup_arg *);
int (*match)(struct fib_rule *,
struct flowi *, int);
@@ -90,7 +91,6 @@ struct fib_rules_ops {
void (*flush_cache)(struct fib_rules_ops *ops);
int nlgroup;
- const struct nla_policy *policy;
struct list_head rules_list;
struct module *owner;
struct net *fro_net;
@@ -102,26 +102,6 @@ struct fib_rule_notifier_info {
struct fib_rule *rule;
};
-#define FRA_GENERIC_POLICY \
- [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \
- [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
- [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
- [FRA_PRIORITY] = { .type = NLA_U32 }, \
- [FRA_FWMARK] = { .type = NLA_U32 }, \
- [FRA_TUN_ID] = { .type = NLA_U64 }, \
- [FRA_FWMASK] = { .type = NLA_U32 }, \
- [FRA_TABLE] = { .type = NLA_U32 }, \
- [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
- [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
- [FRA_GOTO] = { .type = NLA_U32 }, \
- [FRA_L3MDEV] = { .type = NLA_U8 }, \
- [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
- [FRA_PROTOCOL] = { .type = NLA_U8 }, \
- [FRA_IP_PROTO] = { .type = NLA_U8 }, \
- [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
- [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
-
-
static inline void fib_rule_get(struct fib_rule *rule)
{
refcount_inc(&rule->refcnt);
@@ -203,4 +183,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
+
+INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule,
+ struct flowi *fl, int flags));
+INDIRECT_CALLABLE_DECLARE(int fib4_rule_match(struct fib_rule *rule,
+ struct flowi *fl, int flags));
+
+INDIRECT_CALLABLE_DECLARE(int fib6_rule_action(struct fib_rule *rule,
+ struct flowi *flp, int flags,
+ struct fib_lookup_arg *arg));
+INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule,
+ struct flowi *flp, int flags,
+ struct fib_lookup_arg *arg));
+
+INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule,
+ int flags,
+ struct fib_lookup_arg *arg));
+INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule,
+ int flags,
+ struct fib_lookup_arg *arg));
#endif
diff --git a/include/net/firewire.h b/include/net/firewire.h
index 299e5df38552..8fbff8d77865 100644
--- a/include/net/firewire.h
+++ b/include/net/firewire.h
@@ -2,6 +2,8 @@
#ifndef _NET_FIREWIRE_H
#define _NET_FIREWIRE_H
+#include <linux/types.h>
+
/* Pseudo L2 address */
#define FWNET_ALEN 16
union fwnet_hwaddr {
@@ -11,8 +13,7 @@ union fwnet_hwaddr {
__be64 uniq_id; /* EUI-64 */
u8 max_rec; /* max packet size */
u8 sspd; /* max speed */
- __be16 fifo_hi; /* hi 16bits of FIFO addr */
- __be32 fifo_lo; /* lo 32bits of FIFO addr */
+ u8 fifo[6]; /* FIFO addr */
} __packed uc;
};
diff --git a/include/net/flow.h b/include/net/flow.h
index a50fb77a0b27..2f0da4f0318b 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -29,6 +29,7 @@ struct flowi_tunnel {
struct flowi_common {
int flowic_oif;
int flowic_iif;
+ int flowic_l3mdev;
__u32 flowic_mark;
__u8 flowic_tos;
__u8 flowic_scope;
@@ -36,7 +37,6 @@ struct flowi_common {
__u8 flowic_flags;
#define FLOWI_FLAG_ANYSRC 0x01
#define FLOWI_FLAG_KNOWN_NH 0x02
-#define FLOWI_FLAG_SKIP_NH_OIF 0x04
__u32 flowic_secid;
kuid_t flowic_uid;
struct flowi_tunnel flowic_tun_key;
@@ -54,12 +54,6 @@ union flowi_uli {
__u8 code;
} icmpt;
- struct {
- __le16 dport;
- __le16 sport;
- } dnports;
-
- __be32 spi;
__be32 gre_key;
struct {
@@ -71,6 +65,7 @@ struct flowi4 {
struct flowi_common __fl_common;
#define flowi4_oif __fl_common.flowic_oif
#define flowi4_iif __fl_common.flowic_iif
+#define flowi4_l3mdev __fl_common.flowic_l3mdev
#define flowi4_mark __fl_common.flowic_mark
#define flowi4_tos __fl_common.flowic_tos
#define flowi4_scope __fl_common.flowic_scope
@@ -90,7 +85,6 @@ struct flowi4 {
#define fl4_dport uli.ports.dport
#define fl4_icmp_type uli.icmpt.type
#define fl4_icmp_code uli.icmpt.code
-#define fl4_ipsec_spi uli.spi
#define fl4_mh_type uli.mht.type
#define fl4_gre_key uli.gre_key
} __attribute__((__aligned__(BITS_PER_LONG/8)));
@@ -104,6 +98,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
{
fl4->flowi4_oif = oif;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
+ fl4->flowi4_l3mdev = 0;
fl4->flowi4_mark = mark;
fl4->flowi4_tos = tos;
fl4->flowi4_scope = scope;
@@ -116,6 +111,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
fl4->saddr = saddr;
fl4->fl4_dport = dport;
fl4->fl4_sport = sport;
+ fl4->flowi4_multipath_hash = 0;
}
/* Reset some input parameters after previous lookup */
@@ -133,6 +129,7 @@ struct flowi6 {
struct flowi_common __fl_common;
#define flowi6_oif __fl_common.flowic_oif
#define flowi6_iif __fl_common.flowic_iif
+#define flowi6_l3mdev __fl_common.flowic_l3mdev
#define flowi6_mark __fl_common.flowic_mark
#define flowi6_scope __fl_common.flowic_scope
#define flowi6_proto __fl_common.flowic_proto
@@ -149,36 +146,20 @@ struct flowi6 {
#define fl6_dport uli.ports.dport
#define fl6_icmp_type uli.icmpt.type
#define fl6_icmp_code uli.icmpt.code
-#define fl6_ipsec_spi uli.spi
#define fl6_mh_type uli.mht.type
#define fl6_gre_key uli.gre_key
__u32 mp_hash;
} __attribute__((__aligned__(BITS_PER_LONG/8)));
-struct flowidn {
- struct flowi_common __fl_common;
-#define flowidn_oif __fl_common.flowic_oif
-#define flowidn_iif __fl_common.flowic_iif
-#define flowidn_mark __fl_common.flowic_mark
-#define flowidn_scope __fl_common.flowic_scope
-#define flowidn_proto __fl_common.flowic_proto
-#define flowidn_flags __fl_common.flowic_flags
- __le16 daddr;
- __le16 saddr;
- union flowi_uli uli;
-#define fld_sport uli.ports.sport
-#define fld_dport uli.ports.dport
-} __attribute__((__aligned__(BITS_PER_LONG/8)));
-
struct flowi {
union {
struct flowi_common __fl_common;
struct flowi4 ip4;
struct flowi6 ip6;
- struct flowidn dn;
} u;
#define flowi_oif u.__fl_common.flowic_oif
#define flowi_iif u.__fl_common.flowic_iif
+#define flowi_l3mdev u.__fl_common.flowic_l3mdev
#define flowi_mark u.__fl_common.flowic_mark
#define flowi_tos u.__fl_common.flowic_tos
#define flowi_scope u.__fl_common.flowic_scope
@@ -194,32 +175,19 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
return container_of(fl4, struct flowi, u.ip4);
}
-static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6)
+static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4)
{
- return container_of(fl6, struct flowi, u.ip6);
+ return &(fl4->__fl_common);
}
-static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn)
+static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6)
{
- return container_of(fldn, struct flowi, u.dn);
+ return container_of(fl6, struct flowi, u.ip6);
}
-typedef unsigned long flow_compare_t;
-
-static inline unsigned int flow_key_size(u16 family)
+static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6)
{
- switch (family) {
- case AF_INET:
- BUILD_BUG_ON(sizeof(struct flowi4) % sizeof(flow_compare_t));
- return sizeof(struct flowi4) / sizeof(flow_compare_t);
- case AF_INET6:
- BUILD_BUG_ON(sizeof(struct flowi6) % sizeof(flow_compare_t));
- return sizeof(struct flowi6) / sizeof(flow_compare_t);
- case AF_DECnet:
- BUILD_BUG_ON(sizeof(struct flowidn) % sizeof(flow_compare_t));
- return sizeof(struct flowidn) / sizeof(flow_compare_t);
- }
- return 0;
+ return &(fl6->__fl_common);
}
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 628383915827..5ccf52ef8809 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -8,6 +8,8 @@
#include <linux/string.h>
#include <uapi/linux/if_ether.h>
+struct bpf_prog;
+struct net;
struct sk_buff;
/**
@@ -57,15 +59,29 @@ struct flow_dissector_key_vlan {
__be16 vlan_tci;
};
__be16 vlan_tpid;
+ __be16 vlan_eth_type;
+ u16 padding;
};
-struct flow_dissector_key_mpls {
+struct flow_dissector_mpls_lse {
u32 mpls_ttl:8,
mpls_bos:1,
mpls_tc:3,
mpls_label:20;
};
+#define FLOW_DIS_MPLS_MAX 7
+struct flow_dissector_key_mpls {
+ struct flow_dissector_mpls_lse ls[FLOW_DIS_MPLS_MAX]; /* Label Stack */
+ u8 used_lses; /* One bit set for each Label Stack Entry in use */
+};
+
+static inline void dissector_set_mpls_lse(struct flow_dissector_key_mpls *mpls,
+ int lse_index)
+{
+ mpls->used_lses |= 1 << lse_index;
+}
+
#define FLOW_DIS_TUN_OPTS_MAX 255
/**
* struct flow_dissector_key_enc_opts:
@@ -163,6 +179,22 @@ struct flow_dissector_key_ports {
};
/**
+ * struct flow_dissector_key_ports_range
+ * @tp: port number from packet
+ * @tp_min: min port number in range
+ * @tp_max: max port number in range
+ */
+struct flow_dissector_key_ports_range {
+ union {
+ struct flow_dissector_key_ports tp;
+ struct {
+ struct flow_dissector_key_ports tp_min;
+ struct flow_dissector_key_ports tp_max;
+ };
+ };
+};
+
+/**
* flow_dissector_key_icmp:
* type: ICMP type
* code: ICMP code
@@ -229,6 +261,42 @@ struct flow_dissector_key_ct {
u32 ct_labels[4];
};
+/**
+ * struct flow_dissector_key_hash:
+ * @hash: hash value
+ */
+struct flow_dissector_key_hash {
+ u32 hash;
+};
+
+/**
+ * struct flow_dissector_key_num_of_vlans:
+ * @num_of_vlans: num_of_vlans value
+ */
+struct flow_dissector_key_num_of_vlans {
+ u8 num_of_vlans;
+};
+
+/**
+ * struct flow_dissector_key_pppoe:
+ * @session_id: pppoe session id
+ * @ppp_proto: ppp protocol
+ * @type: pppoe eth type
+ */
+struct flow_dissector_key_pppoe {
+ __be16 session_id;
+ __be16 ppp_proto;
+ __be16 type;
+};
+
+/**
+ * struct flow_dissector_key_l2tpv3:
+ * @session_id: identifier for a l2tp session
+ */
+struct flow_dissector_key_l2tpv3 {
+ __be32 session_id;
+};
+
enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
@@ -257,6 +325,10 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */
FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
+ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
+ FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */
+ FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */
FLOW_DISSECTOR_KEY_MAX,
};
@@ -264,6 +336,7 @@ enum flow_dissector_key_id {
#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0)
#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1)
#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2)
+#define FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP BIT(3)
struct flow_dissector_key {
enum flow_dissector_key_id key_id;
@@ -327,7 +400,7 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
u32 flow_hash_from_keys(struct flow_keys *keys);
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp,
- void *data, int thoff, int hlen);
+ const void *data, int thoff, int hlen);
static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -345,8 +418,8 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
struct bpf_flow_dissector {
struct bpf_flow_keys *flow_keys;
const struct sk_buff *skb;
- void *data;
- void *data_end;
+ const void *data;
+ const void *data_end;
};
static inline void
@@ -357,4 +430,9 @@ flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
memset(key_basic, 0, sizeof(*key_basic));
}
+#ifdef CONFIG_BPF_SYSCALL
+int flow_dissector_bpf_prog_attach_check(struct net *net,
+ struct bpf_prog *prog);
+#endif /* CONFIG_BPF_SYSCALL */
+
#endif
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index c6f7bd22db60..e343f9f8363e 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -3,8 +3,8 @@
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/netlink.h>
#include <net/flow_dissector.h>
-#include <linux/rhashtable.h>
struct flow_match {
struct flow_dissector *dissector;
@@ -48,6 +48,10 @@ struct flow_match_ports {
struct flow_dissector_key_ports *key, *mask;
};
+struct flow_match_ports_range {
+ struct flow_dissector_key_ports_range *key, *mask;
+};
+
struct flow_match_icmp {
struct flow_dissector_key_icmp *key, *mask;
};
@@ -68,6 +72,18 @@ struct flow_match_enc_opts {
struct flow_dissector_key_enc_opts *key, *mask;
};
+struct flow_match_ct {
+ struct flow_dissector_key_ct *key, *mask;
+};
+
+struct flow_match_pppoe {
+ struct flow_dissector_key_pppoe *key, *mask;
+};
+
+struct flow_match_l2tpv3 {
+ struct flow_dissector_key_l2tpv3 *key, *mask;
+};
+
struct flow_rule;
void flow_rule_match_meta(const struct flow_rule *rule,
@@ -90,6 +106,8 @@ void flow_rule_match_ip(const struct flow_rule *rule,
struct flow_match_ip *out);
void flow_rule_match_ports(const struct flow_rule *rule,
struct flow_match_ports *out);
+void flow_rule_match_ports_range(const struct flow_rule *rule,
+ struct flow_match_ports_range *out);
void flow_rule_match_tcp(const struct flow_rule *rule,
struct flow_match_tcp *out);
void flow_rule_match_icmp(const struct flow_rule *rule,
@@ -110,6 +128,12 @@ void flow_rule_match_enc_keyid(const struct flow_rule *rule,
struct flow_match_enc_keyid *out);
void flow_rule_match_enc_opts(const struct flow_rule *rule,
struct flow_match_enc_opts *out);
+void flow_rule_match_ct(const struct flow_rule *rule,
+ struct flow_match_ct *out);
+void flow_rule_match_pppoe(const struct flow_rule *rule,
+ struct flow_match_pppoe *out);
+void flow_rule_match_l2tpv3(const struct flow_rule *rule,
+ struct flow_match_l2tpv3 *out);
enum flow_action_id {
FLOW_ACTION_ACCEPT = 0,
@@ -130,14 +154,23 @@ enum flow_action_id {
FLOW_ACTION_CSUM,
FLOW_ACTION_MARK,
FLOW_ACTION_PTYPE,
+ FLOW_ACTION_PRIORITY,
FLOW_ACTION_WAKE,
FLOW_ACTION_QUEUE,
FLOW_ACTION_SAMPLE,
FLOW_ACTION_POLICE,
FLOW_ACTION_CT,
+ FLOW_ACTION_CT_METADATA,
FLOW_ACTION_MPLS_PUSH,
FLOW_ACTION_MPLS_POP,
FLOW_ACTION_MPLS_MANGLE,
+ FLOW_ACTION_GATE,
+ FLOW_ACTION_PPPOE_PUSH,
+ FLOW_ACTION_JUMP,
+ FLOW_ACTION_PIPE,
+ FLOW_ACTION_VLAN_PUSH_ETH,
+ FLOW_ACTION_VLAN_POP_ETH,
+ FLOW_ACTION_CONTINUE,
NUM_FLOW_ACTIONS,
};
@@ -154,10 +187,41 @@ enum flow_action_mangle_base {
FLOW_ACT_MANGLE_HDR_TYPE_UDP,
};
+enum flow_action_hw_stats_bit {
+ FLOW_ACTION_HW_STATS_IMMEDIATE_BIT,
+ FLOW_ACTION_HW_STATS_DELAYED_BIT,
+ FLOW_ACTION_HW_STATS_DISABLED_BIT,
+
+ FLOW_ACTION_HW_STATS_NUM_BITS
+};
+
+enum flow_action_hw_stats {
+ FLOW_ACTION_HW_STATS_IMMEDIATE =
+ BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT),
+ FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT),
+ FLOW_ACTION_HW_STATS_ANY = FLOW_ACTION_HW_STATS_IMMEDIATE |
+ FLOW_ACTION_HW_STATS_DELAYED,
+ FLOW_ACTION_HW_STATS_DISABLED =
+ BIT(FLOW_ACTION_HW_STATS_DISABLED_BIT),
+ FLOW_ACTION_HW_STATS_DONT_CARE = BIT(FLOW_ACTION_HW_STATS_NUM_BITS) - 1,
+};
+
typedef void (*action_destr)(void *priv);
+struct flow_action_cookie {
+ u32 cookie_len;
+ u8 cookie[];
+};
+
+struct flow_action_cookie *flow_action_cookie_create(void *data,
+ unsigned int len,
+ gfp_t gfp);
+void flow_action_cookie_destroy(struct flow_action_cookie *cookie);
+
struct flow_action_entry {
enum flow_action_id id;
+ u32 hw_index;
+ enum flow_action_hw_stats hw_stats;
action_destr destructor;
void *destructor_priv;
union {
@@ -168,7 +232,12 @@ struct flow_action_entry {
__be16 proto;
u8 prio;
} vlan;
- struct { /* FLOW_ACTION_PACKET_EDIT */
+ struct { /* FLOW_ACTION_VLAN_PUSH_ETH */
+ unsigned char dst[ETH_ALEN];
+ unsigned char src[ETH_ALEN];
+ } vlan_push_eth;
+ struct { /* FLOW_ACTION_MANGLE */
+ /* FLOW_ACTION_ADD */
enum flow_action_mangle_base htype;
u32 offset;
u32 mask;
@@ -178,6 +247,7 @@ struct flow_action_entry {
u32 csum_flags; /* FLOW_ACTION_CSUM */
u32 mark; /* FLOW_ACTION_MARK */
u16 ptype; /* FLOW_ACTION_PTYPE */
+ u32 priority; /* FLOW_ACTION_PRIORITY */
struct { /* FLOW_ACTION_QUEUE */
u32 ctx;
u32 index;
@@ -190,13 +260,30 @@ struct flow_action_entry {
bool truncate;
} sample;
struct { /* FLOW_ACTION_POLICE */
- s64 burst;
+ u32 burst;
u64 rate_bytes_ps;
+ u64 peakrate_bytes_ps;
+ u32 avrate;
+ u16 overhead;
+ u64 burst_pkt;
+ u64 rate_pkt_ps;
+ u32 mtu;
+ struct {
+ enum flow_action_id act_id;
+ u32 extval;
+ } exceed, notexceed;
} police;
struct { /* FLOW_ACTION_CT */
int action;
u16 zone;
+ struct nf_flowtable *flow_table;
} ct;
+ struct {
+ unsigned long cookie;
+ u32 mark;
+ u32 labels[4];
+ bool orig_dir;
+ } ct_metadata;
struct { /* FLOW_ACTION_MPLS_PUSH */
u32 label;
__be16 proto;
@@ -213,12 +300,24 @@ struct flow_action_entry {
u8 bos;
u8 ttl;
} mpls_mangle;
+ struct {
+ s32 prio;
+ u64 basetime;
+ u64 cycletime;
+ u64 cycletimeext;
+ u32 num_entries;
+ struct action_gate_entry *entries;
+ } gate;
+ struct { /* FLOW_ACTION_PPPOE_PUSH */
+ u16 sid;
+ } pppoe;
};
+ struct flow_action_cookie *cookie; /* user defined action cookie */
};
struct flow_action {
unsigned int num_entries;
- struct flow_action_entry entries[0];
+ struct flow_action_entry entries[];
};
static inline bool flow_action_has_entries(const struct flow_action *action)
@@ -227,7 +326,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
}
/**
- * flow_action_has_one_action() - check if exactly one action is present
+ * flow_offload_has_one_action() - check if exactly one action is present
* @action: tc filter flow offload action
*
* Returns true if exactly one action is present.
@@ -237,8 +336,89 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action)
return action->num_entries == 1;
}
+static inline bool flow_action_is_last_entry(const struct flow_action *action,
+ const struct flow_action_entry *entry)
+{
+ return entry == &action->entries[action->num_entries - 1];
+}
+
#define flow_action_for_each(__i, __act, __actions) \
- for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i])
+ for (__i = 0, __act = &(__actions)->entries[0]; \
+ __i < (__actions)->num_entries; \
+ __act = &(__actions)->entries[++__i])
+
+static inline bool
+flow_action_mixed_hw_stats_check(const struct flow_action *action,
+ struct netlink_ext_ack *extack)
+{
+ const struct flow_action_entry *action_entry;
+ u8 last_hw_stats;
+ int i;
+
+ if (flow_offload_has_one_action(action))
+ return true;
+
+ flow_action_for_each(i, action_entry, action) {
+ if (i && action_entry->hw_stats != last_hw_stats) {
+ NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported");
+ return false;
+ }
+ last_hw_stats = action_entry->hw_stats;
+ }
+ return true;
+}
+
+static inline const struct flow_action_entry *
+flow_action_first_entry_get(const struct flow_action *action)
+{
+ WARN_ON(!flow_action_has_entries(action));
+ return &action->entries[0];
+}
+
+static inline bool
+__flow_action_hw_stats_check(const struct flow_action *action,
+ struct netlink_ext_ack *extack,
+ bool check_allow_bit,
+ enum flow_action_hw_stats_bit allow_bit)
+{
+ const struct flow_action_entry *action_entry;
+
+ if (!flow_action_has_entries(action))
+ return true;
+ if (!flow_action_mixed_hw_stats_check(action, extack))
+ return false;
+
+ action_entry = flow_action_first_entry_get(action);
+
+ /* Zero is not a legal value for hw_stats, catch anyone passing it */
+ WARN_ON_ONCE(!action_entry->hw_stats);
+
+ if (!check_allow_bit &&
+ ~action_entry->hw_stats & FLOW_ACTION_HW_STATS_ANY) {
+ NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\"");
+ return false;
+ } else if (check_allow_bit &&
+ !(action_entry->hw_stats & BIT(allow_bit))) {
+ NL_SET_ERR_MSG_MOD(extack, "Driver does not support selected HW stats type");
+ return false;
+ }
+ return true;
+}
+
+static inline bool
+flow_action_hw_stats_check(const struct flow_action *action,
+ struct netlink_ext_ack *extack,
+ enum flow_action_hw_stats_bit allow_bit)
+{
+ return __flow_action_hw_stats_check(action, extack, true, allow_bit);
+}
+
+static inline bool
+flow_action_basic_hw_stats_check(const struct flow_action *action,
+ struct netlink_ext_ack *extack)
+{
+ return __flow_action_hw_stats_check(action, extack, false, 0);
+}
struct flow_rule {
struct flow_match match;
@@ -256,15 +436,28 @@ static inline bool flow_rule_match_key(const struct flow_rule *rule,
struct flow_stats {
u64 pkts;
u64 bytes;
+ u64 drops;
u64 lastused;
+ enum flow_action_hw_stats used_hw_stats;
+ bool used_hw_stats_valid;
};
static inline void flow_stats_update(struct flow_stats *flow_stats,
- u64 bytes, u64 pkts, u64 lastused)
+ u64 bytes, u64 pkts,
+ u64 drops, u64 lastused,
+ enum flow_action_hw_stats used_hw_stats)
{
flow_stats->pkts += pkts;
flow_stats->bytes += bytes;
+ flow_stats->drops += drops;
flow_stats->lastused = max_t(u64, flow_stats->lastused, lastused);
+
+ /* The driver should pass value with a maximum of one bit set.
+ * Passing FLOW_ACTION_HW_STATS_ANY is invalid.
+ */
+ WARN_ON(used_hw_stats == FLOW_ACTION_HW_STATS_ANY);
+ flow_stats->used_hw_stats |= used_hw_stats;
+ flow_stats->used_hw_stats_valid = true;
}
enum flow_block_command {
@@ -276,6 +469,8 @@ enum flow_block_binder_type {
FLOW_BLOCK_BINDER_TYPE_UNSPEC,
FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
+ FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
+ FLOW_BLOCK_BINDER_TYPE_RED_MARK,
};
struct flow_block {
@@ -294,12 +489,26 @@ struct flow_block_offload {
struct list_head cb_list;
struct list_head *driver_block_list;
struct netlink_ext_ack *extack;
+ struct Qdisc *sch;
+ struct list_head *cb_list_head;
};
enum tc_setup_type;
typedef int flow_setup_cb_t(enum tc_setup_type type, void *type_data,
void *cb_priv);
+struct flow_block_cb;
+
+struct flow_block_indr {
+ struct list_head list;
+ struct net_device *dev;
+ struct Qdisc *sch;
+ enum flow_block_binder_type binder_type;
+ void *data;
+ void *cb_priv;
+ void (*cleanup)(struct flow_block_cb *block_cb);
+};
+
struct flow_block_cb {
struct list_head driver_list;
struct list_head list;
@@ -307,12 +516,21 @@ struct flow_block_cb {
void *cb_ident;
void *cb_priv;
void (*release)(void *cb_priv);
+ struct flow_block_indr indr;
unsigned int refcnt;
};
struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv));
+struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
+ void *cb_ident, void *cb_priv,
+ void (*release)(void *cb_priv),
+ struct flow_block_offload *bo,
+ struct net_device *dev,
+ struct Qdisc *sch, void *data,
+ void *indr_cb_priv,
+ void (*cleanup)(struct flow_block_cb *block_cb));
void flow_block_cb_free(struct flow_block_cb *block_cb);
struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
@@ -334,6 +552,13 @@ static inline void flow_block_cb_remove(struct flow_block_cb *block_cb,
list_move(&block_cb->list, &offload->cb_list);
}
+static inline void flow_indr_block_cb_remove(struct flow_block_cb *block_cb,
+ struct flow_block_offload *offload)
+{
+ list_del(&block_cb->indr.list);
+ list_move(&block_cb->list, &offload->cb_list);
+}
+
bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
struct list_head *driver_block_list);
@@ -366,6 +591,23 @@ struct flow_cls_offload {
u32 classid;
};
+enum offload_act_command {
+ FLOW_ACT_REPLACE,
+ FLOW_ACT_DESTROY,
+ FLOW_ACT_STATS,
+};
+
+struct flow_offload_action {
+ struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/
+ enum offload_act_command command;
+ enum flow_action_id id;
+ u32 index;
+ struct flow_stats stats;
+ struct flow_action action;
+};
+
+struct flow_offload_action *offload_action_alloc(unsigned int num_actions);
+
static inline struct flow_rule *
flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
{
@@ -377,39 +619,18 @@ static inline void flow_block_init(struct flow_block *flow_block)
INIT_LIST_HEAD(&flow_block->cb_list);
}
-typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
- enum tc_setup_type type, void *type_data);
-
-typedef void flow_indr_block_cmd_t(struct net_device *dev,
- flow_indr_block_bind_cb_t *cb, void *cb_priv,
- enum flow_block_command command);
-
-struct flow_indr_block_entry {
- flow_indr_block_cmd_t *cb;
- struct list_head list;
-};
-
-void flow_indr_add_block_cb(struct flow_indr_block_entry *entry);
-
-void flow_indr_del_block_cb(struct flow_indr_block_entry *entry);
-
-int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- flow_indr_block_bind_cb_t *cb,
- void *cb_ident);
-
-void __flow_indr_block_cb_unregister(struct net_device *dev,
- flow_indr_block_bind_cb_t *cb,
- void *cb_ident);
-
-int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- flow_indr_block_bind_cb_t *cb, void *cb_ident);
-
-void flow_indr_block_cb_unregister(struct net_device *dev,
- flow_indr_block_bind_cb_t *cb,
- void *cb_ident);
-
-void flow_indr_block_call(struct net_device *dev,
- struct flow_block_offload *bo,
- enum flow_block_command command);
+typedef int flow_indr_block_bind_cb_t(struct net_device *dev, struct Qdisc *sch, void *cb_priv,
+ enum tc_setup_type type, void *type_data,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb));
+
+int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv);
+void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
+ void (*release)(void *cb_priv));
+int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
+ enum tc_setup_type type, void *data,
+ struct flow_block_offload *bo,
+ void (*cleanup)(struct flow_block_cb *block_cb));
+bool flow_indr_dev_exists(void);
#endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/fq.h b/include/net/fq.h
index 2ad85e683041..07b5aff6ec58 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -7,6 +7,10 @@
#ifndef __NET_SCHED_FQ_H
#define __NET_SCHED_FQ_H
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
struct fq_tin;
/**
@@ -19,8 +23,6 @@ struct fq_tin;
* @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++
* (deficit round robin) based round robin queuing similar to the one
* found in net/sched/sch_fq_codel.c
- * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of
- * fat flows and efficient head-dropping if packet limit is reached
* @queue: sk_buff queue to hold packets
* @backlog: number of bytes pending in the queue. The number of packets can be
* found in @queue.qlen
@@ -29,7 +31,6 @@ struct fq_tin;
struct fq_flow {
struct fq_tin *tin;
struct list_head flowchain;
- struct list_head backlogchain;
struct sk_buff_head queue;
u32 backlog;
int deficit;
@@ -47,6 +48,8 @@ struct fq_flow {
struct fq_tin {
struct list_head new_flows;
struct list_head old_flows;
+ struct list_head tin_list;
+ struct fq_flow default_flow;
u32 backlog_bytes;
u32 backlog_packets;
u32 overlimit;
@@ -59,17 +62,16 @@ struct fq_tin {
/**
* struct fq - main container for fair queuing purposes
*
- * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient
- * head-dropping when @backlog reaches @limit
* @limit: max number of packets that can be queued across all flows
* @backlog: number of packets queued across all flows
*/
struct fq {
struct fq_flow *flows;
- struct list_head backlogs;
+ unsigned long *flows_bitmap;
+
+ struct list_head tin_backlog;
spinlock_t lock;
u32 flows_cnt;
- siphash_key_t perturbation;
u32 limit;
u32 memory_limit;
u32 memory_usage;
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index 38a9a3d1222b..524b510f1c68 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -11,35 +11,37 @@
/* functions that are embedded into includer */
-static void fq_adjust_removal(struct fq *fq,
- struct fq_flow *flow,
- struct sk_buff *skb)
+
+static void
+__fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets,
+ unsigned int bytes, unsigned int truesize)
{
struct fq_tin *tin = flow->tin;
+ int idx;
- tin->backlog_bytes -= skb->len;
- tin->backlog_packets--;
- flow->backlog -= skb->len;
- fq->backlog--;
- fq->memory_usage -= skb->truesize;
-}
+ tin->backlog_bytes -= bytes;
+ tin->backlog_packets -= packets;
+ flow->backlog -= bytes;
+ fq->backlog -= packets;
+ fq->memory_usage -= truesize;
-static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
-{
- struct fq_flow *i;
+ if (flow->backlog)
+ return;
- if (flow->backlog == 0) {
- list_del_init(&flow->backlogchain);
- } else {
- i = flow;
+ if (flow == &tin->default_flow) {
+ list_del_init(&tin->tin_list);
+ return;
+ }
- list_for_each_entry_continue(i, &fq->backlogs, backlogchain)
- if (i->backlog < flow->backlog)
- break;
+ idx = flow - fq->flows;
+ __clear_bit(idx, fq->flows_bitmap);
+}
- list_move_tail(&flow->backlogchain,
- &i->backlogchain);
- }
+static void fq_adjust_removal(struct fq *fq,
+ struct fq_flow *flow,
+ struct sk_buff *skb)
+{
+ __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize);
}
static struct sk_buff *fq_flow_dequeue(struct fq *fq,
@@ -54,11 +56,37 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq,
return NULL;
fq_adjust_removal(fq, flow, skb);
- fq_rejigger_backlog(fq, flow);
return skb;
}
+static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
+ fq_skb_free_t free_func)
+{
+ unsigned int packets = 0, bytes = 0, truesize = 0;
+ struct fq_tin *tin = flow->tin;
+ struct sk_buff *skb;
+ int pending;
+
+ lockdep_assert_held(&fq->lock);
+
+ pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2);
+ do {
+ skb = __skb_dequeue(&flow->queue);
+ if (!skb)
+ break;
+
+ packets++;
+ bytes += skb->len;
+ truesize += skb->truesize;
+ free_func(fq, tin, flow, skb);
+ } while (packets < pending);
+
+ __fq_adjust_removal(fq, flow, packets, bytes, truesize);
+
+ return packets;
+}
+
static struct sk_buff *fq_tin_dequeue(struct fq *fq,
struct fq_tin *tin,
fq_tin_dequeue_t dequeue_func)
@@ -108,15 +136,14 @@ begin:
static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
{
- u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
+ u32 hash = skb_get_hash(skb);
return reciprocal_scale(hash, fq->flows_cnt);
}
static struct fq_flow *fq_flow_classify(struct fq *fq,
struct fq_tin *tin, u32 idx,
- struct sk_buff *skb,
- fq_flow_get_default_t get_default_func)
+ struct sk_buff *skb)
{
struct fq_flow *flow;
@@ -124,7 +151,7 @@ static struct fq_flow *fq_flow_classify(struct fq *fq,
flow = &fq->flows[idx];
if (flow->tin && flow->tin != tin) {
- flow = get_default_func(fq, tin, idx, skb);
+ flow = &tin->default_flow;
tin->collisions++;
fq->collisions++;
}
@@ -135,36 +162,56 @@ static struct fq_flow *fq_flow_classify(struct fq *fq,
return flow;
}
-static void fq_recalc_backlog(struct fq *fq,
- struct fq_tin *tin,
- struct fq_flow *flow)
+static struct fq_flow *fq_find_fattest_flow(struct fq *fq)
{
- struct fq_flow *i;
+ struct fq_tin *tin;
+ struct fq_flow *flow = NULL;
+ u32 len = 0;
+ int i;
- if (list_empty(&flow->backlogchain))
- list_add_tail(&flow->backlogchain, &fq->backlogs);
+ for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) {
+ struct fq_flow *cur = &fq->flows[i];
+ unsigned int cur_len;
- i = flow;
- list_for_each_entry_continue_reverse(i, &fq->backlogs,
- backlogchain)
- if (i->backlog > flow->backlog)
- break;
+ cur_len = cur->backlog;
+ if (cur_len <= len)
+ continue;
+
+ flow = cur;
+ len = cur_len;
+ }
+
+ list_for_each_entry(tin, &fq->tin_backlog, tin_list) {
+ unsigned int cur_len = tin->default_flow.backlog;
- list_move(&flow->backlogchain, &i->backlogchain);
+ if (cur_len <= len)
+ continue;
+
+ flow = &tin->default_flow;
+ len = cur_len;
+ }
+
+ return flow;
}
static void fq_tin_enqueue(struct fq *fq,
struct fq_tin *tin, u32 idx,
struct sk_buff *skb,
- fq_skb_free_t free_func,
- fq_flow_get_default_t get_default_func)
+ fq_skb_free_t free_func)
{
struct fq_flow *flow;
bool oom;
lockdep_assert_held(&fq->lock);
- flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
+ flow = fq_flow_classify(fq, tin, idx, skb);
+
+ if (!flow->backlog) {
+ if (flow != &tin->default_flow)
+ __set_bit(idx, fq->flows_bitmap);
+ else if (list_empty(&tin->tin_list))
+ list_add(&tin->tin_list, &fq->tin_backlog);
+ }
flow->tin = tin;
flow->backlog += skb->len;
@@ -173,8 +220,6 @@ static void fq_tin_enqueue(struct fq *fq,
fq->memory_usage += skb->truesize;
fq->backlog++;
- fq_recalc_backlog(fq, tin, flow);
-
if (list_empty(&flow->flowchain)) {
flow->deficit = fq->quantum;
list_add_tail(&flow->flowchain,
@@ -184,18 +229,13 @@ static void fq_tin_enqueue(struct fq *fq,
__skb_queue_tail(&flow->queue, skb);
oom = (fq->memory_usage > fq->memory_limit);
while (fq->backlog > fq->limit || oom) {
- flow = list_first_entry_or_null(&fq->backlogs,
- struct fq_flow,
- backlogchain);
+ flow = fq_find_fattest_flow(fq);
if (!flow)
return;
- skb = fq_flow_dequeue(fq, flow);
- if (!skb)
+ if (!fq_flow_drop(fq, flow, free_func))
return;
- free_func(fq, flow->tin, flow, skb);
-
flow->tin->overlimit++;
fq->overlimit++;
if (oom) {
@@ -224,8 +264,6 @@ static void fq_flow_filter(struct fq *fq,
fq_adjust_removal(fq, flow, skb);
free_func(fq, tin, flow, skb);
}
-
- fq_rejigger_backlog(fq, flow);
}
static void fq_tin_filter(struct fq *fq,
@@ -248,16 +286,18 @@ static void fq_flow_reset(struct fq *fq,
struct fq_flow *flow,
fq_skb_free_t free_func)
{
+ struct fq_tin *tin = flow->tin;
struct sk_buff *skb;
while ((skb = fq_flow_dequeue(fq, flow)))
- free_func(fq, flow->tin, flow, skb);
+ free_func(fq, tin, flow, skb);
- if (!list_empty(&flow->flowchain))
+ if (!list_empty(&flow->flowchain)) {
list_del_init(&flow->flowchain);
-
- if (!list_empty(&flow->backlogchain))
- list_del_init(&flow->backlogchain);
+ if (list_empty(&tin->new_flows) &&
+ list_empty(&tin->old_flows))
+ list_del_init(&tin->tin_list);
+ }
flow->tin = NULL;
@@ -283,6 +323,7 @@ static void fq_tin_reset(struct fq *fq,
fq_flow_reset(fq, flow, free_func);
}
+ WARN_ON_ONCE(!list_empty(&tin->tin_list));
WARN_ON_ONCE(tin->backlog_bytes);
WARN_ON_ONCE(tin->backlog_packets);
}
@@ -290,7 +331,6 @@ static void fq_tin_reset(struct fq *fq,
static void fq_flow_init(struct fq_flow *flow)
{
INIT_LIST_HEAD(&flow->flowchain);
- INIT_LIST_HEAD(&flow->backlogchain);
__skb_queue_head_init(&flow->queue);
}
@@ -298,6 +338,8 @@ static void fq_tin_init(struct fq_tin *tin)
{
INIT_LIST_HEAD(&tin->new_flows);
INIT_LIST_HEAD(&tin->old_flows);
+ INIT_LIST_HEAD(&tin->tin_list);
+ fq_flow_init(&tin->default_flow);
}
static int fq_init(struct fq *fq, int flows_cnt)
@@ -305,10 +347,9 @@ static int fq_init(struct fq *fq, int flows_cnt)
int i;
memset(fq, 0, sizeof(fq[0]));
- INIT_LIST_HEAD(&fq->backlogs);
spin_lock_init(&fq->lock);
+ INIT_LIST_HEAD(&fq->tin_backlog);
fq->flows_cnt = max_t(u32, flows_cnt, 1);
- get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
fq->quantum = 300;
fq->limit = 8192;
fq->memory_limit = 16 << 20; /* 16 MBytes */
@@ -317,6 +358,13 @@ static int fq_init(struct fq *fq, int flows_cnt)
if (!fq->flows)
return -ENOMEM;
+ fq->flows_bitmap = bitmap_zalloc(fq->flows_cnt, GFP_KERNEL);
+ if (!fq->flows_bitmap) {
+ kvfree(fq->flows);
+ fq->flows = NULL;
+ return -ENOMEM;
+ }
+
for (i = 0; i < fq->flows_cnt; i++)
fq_flow_init(&fq->flows[i]);
@@ -333,6 +381,9 @@ static void fq_reset(struct fq *fq,
kvfree(fq->flows);
fq->flows = NULL;
+
+ bitmap_free(fq->flows_bitmap);
+ fq->flows_bitmap = NULL;
}
#endif
diff --git a/include/net/garp.h b/include/net/garp.h
index 4d9a0c6a2e5f..59a07b171def 100644
--- a/include/net/garp.h
+++ b/include/net/garp.h
@@ -2,6 +2,8 @@
#ifndef _NET_GARP_H
#define _NET_GARP_H
+#include <linux/if_ether.h>
+#include <linux/types.h>
#include <net/stp.h>
#define GARP_PROTOCOL_ID 0x1
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 1424e02cef90..7aa2b8e1fb29 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -7,14 +7,17 @@
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
-/* Note: this used to be in include/uapi/linux/gen_stats.h */
-struct gnet_stats_basic_packed {
- __u64 bytes;
- __u64 packets;
-};
-
-struct gnet_stats_basic_cpu {
- struct gnet_stats_basic_packed bstats;
+/* Throughput stats.
+ * Must be initialized beforehand with gnet_stats_basic_sync_init().
+ *
+ * If no reads can ever occur parallel to writes (e.g. stack-allocated
+ * bstats), then the internal stat values can be written to and read
+ * from directly. Otherwise, use _bstats_set/update() for writes and
+ * gnet_stats_add_basic() for reads.
+ */
+struct gnet_stats_basic_sync {
+ u64_stats_t bytes;
+ u64_stats_t packets;
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
@@ -34,6 +37,7 @@ struct gnet_dump {
struct tc_stats tc_stats;
};
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
struct gnet_dump *d, int padattr);
@@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d,
int padattr);
-int gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
-int gnet_stats_copy_basic_hw(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
+int gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
+int gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
struct net_rate_estimator __rcu **ptr);
int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue __percpu *cpu_q,
struct gnet_stats_queue *q, __u32 qlen);
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu_q,
- const struct gnet_stats_queue *q, __u32 qlen);
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu_q,
+ const struct gnet_stats_queue *q);
int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt);
+ bool running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **ptr,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt);
+ bool running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
struct gnet_stats_rate_est64 *sample);
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 74950663bb00..9f97f73615b6 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -11,9 +11,11 @@
/**
* struct genl_multicast_group - generic netlink multicast group
* @name: name of the multicast group, names are per-family
+ * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
*/
struct genl_multicast_group {
char name[GENL_NAMSIZ];
+ u8 flags;
};
struct genl_ops;
@@ -35,19 +37,25 @@ struct genl_info;
* do additional, common, filtering and return an error
* @post_doit: called after an operation's doit callback, it may
* undo operations done by pre_doit, for example release locks
- * @mcast_bind: a socket bound to the given multicast group (which
- * is given as the offset into the groups array)
- * @mcast_unbind: a socket was unbound from the given multicast group.
- * Note that unbind() will not be called symmetrically if the
- * generic netlink family is removed while there are still open
- * sockets.
- * @attrbuf: buffer to store parsed attributes (private)
+ * @module: pointer to the owning module (set to THIS_MODULE)
* @mcgrps: multicast groups used by this family
* @n_mcgrps: number of multicast groups
+ * @resv_start_op: first operation for which reserved fields of the header
+ * can be validated and policies are required (see below);
+ * new families should leave this field at zero
* @mcgrp_offset: starting number of multicast group IDs in this family
* (private)
* @ops: the operations supported by this family
* @n_ops: number of operations supported by this family
+ * @small_ops: the small-struct operations supported by this family
+ * @n_small_ops: number of small-struct operations supported by this family
+ *
+ * Attribute policies (the combination of @policy and @maxattr fields)
+ * can be attached at the family level or at the operation level.
+ * If both are present the per-operation policy takes precedence.
+ * For operations before @resv_start_op lack of policy means that the core
+ * will perform no attribute parsing or validation. For newer operations
+ * if policy is not provided core will reject all TLV attributes.
*/
struct genl_family {
int id; /* private */
@@ -55,8 +63,13 @@ struct genl_family {
char name[GENL_NAMSIZ];
unsigned int version;
unsigned int maxattr;
- bool netnsok;
- bool parallel_ops;
+ unsigned int mcgrp_offset; /* private */
+ u8 netnsok:1;
+ u8 parallel_ops:1;
+ u8 n_ops;
+ u8 n_small_ops;
+ u8 n_mcgrps;
+ u8 resv_start_op;
const struct nla_policy *policy;
int (*pre_doit)(const struct genl_ops *ops,
struct sk_buff *skb,
@@ -64,14 +77,9 @@ struct genl_family {
void (*post_doit)(const struct genl_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
- int (*mcast_bind)(struct net *net, int group);
- void (*mcast_unbind)(struct net *net, int group);
- struct nlattr ** attrbuf; /* private */
const struct genl_ops * ops;
+ const struct genl_small_ops *small_ops;
const struct genl_multicast_group *mcgrps;
- unsigned int n_ops;
- unsigned int n_mcgrps;
- unsigned int mcgrp_offset; /* private */
struct module *module;
};
@@ -111,13 +119,12 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
#define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
-static inline int genl_err_attr(struct genl_info *info, int err,
- const struct nlattr *attr)
-{
- info->extack->bad_attr = attr;
-
- return err;
-}
+/* Report that a root attribute is missing */
+#define GENL_REQ_ATTR_CHECK(info, attr) ({ \
+ struct genl_info *__info = (info); \
+ \
+ NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \
+})
enum genl_validate_flags {
GENL_DONT_VALIDATE_STRICT = BIT(0),
@@ -126,28 +133,34 @@ enum genl_validate_flags {
};
/**
- * struct genl_info - info that is available during dumpit op call
- * @family: generic netlink family - for internal genl code usage
- * @ops: generic netlink ops - for internal genl code usage
- * @attrs: netlink attributes
+ * struct genl_small_ops - generic netlink operations (small version)
+ * @cmd: command identifier
+ * @internal_flags: flags used by the family
+ * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
+ * @validate: validation flags from enum genl_validate_flags
+ * @doit: standard command callback
+ * @dumpit: callback for dumpers
+ *
+ * This is a cut-down version of struct genl_ops for users who don't need
+ * most of the ancillary infra and want to save space.
*/
-struct genl_dumpit_info {
- const struct genl_family *family;
- const struct genl_ops *ops;
- struct nlattr **attrs;
+struct genl_small_ops {
+ int (*doit)(struct sk_buff *skb, struct genl_info *info);
+ int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb);
+ u8 cmd;
+ u8 internal_flags;
+ u8 flags;
+ u8 validate;
};
-static inline const struct genl_dumpit_info *
-genl_dumpit_info(struct netlink_callback *cb)
-{
- return cb->data;
-}
-
/**
* struct genl_ops - generic netlink operations
* @cmd: command identifier
* @internal_flags: flags used by the family
- * @flags: flags
+ * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
+ * @maxattr: maximum number of attributes supported
+ * @policy: netlink policy (takes precedence over family policy)
+ * @validate: validation flags from enum genl_validate_flags
* @doit: standard command callback
* @start: start callback for dumps
* @dumpit: callback for dumpers
@@ -160,12 +173,32 @@ struct genl_ops {
int (*dumpit)(struct sk_buff *skb,
struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
+ const struct nla_policy *policy;
+ unsigned int maxattr;
u8 cmd;
u8 internal_flags;
u8 flags;
u8 validate;
};
+/**
+ * struct genl_dumpit_info - info that is available during dumpit op call
+ * @family: generic netlink family - for internal genl code usage
+ * @op: generic netlink ops - for internal genl code usage
+ * @attrs: netlink attributes
+ */
+struct genl_dumpit_info {
+ const struct genl_family *family;
+ struct genl_ops op;
+ struct nlattr **attrs;
+};
+
+static inline const struct genl_dumpit_info *
+genl_dumpit_info(struct netlink_callback *cb)
+{
+ return cb->data;
+}
+
int genl_register_family(struct genl_family *family);
int genl_unregister_family(const struct genl_family *family);
void genl_notify(const struct genl_family *family, struct sk_buff *skb,
@@ -330,6 +363,7 @@ int genlmsg_multicast_allns(const struct genl_family *family,
/**
* genlmsg_unicast - unicast a netlink message
+ * @net: network namespace to look up @portid in
* @skb: netlink message as socket buffer
* @portid: netlink portid of the destination socket
*/
@@ -349,7 +383,7 @@ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
}
/**
- * gennlmsg_data - head of message payload
+ * genlmsg_data - head of message payload
* @gnlh: genetlink message header
*/
static inline void *genlmsg_data(const struct genlmsghdr *gnlh)
diff --git a/include/net/gre.h b/include/net/gre.h
index b60f212c16c6..4e209708b754 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -106,17 +106,6 @@ static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags)
return flags;
}
-static inline __sum16 gre_checksum(struct sk_buff *skb)
-{
- __wsum csum;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- csum = lco_csum(skb);
- else
- csum = skb_checksum(skb, 0, skb->len, 0);
- return csum_fold(csum);
-}
-
static inline void gre_build_header(struct sk_buff *skb, int hdr_len,
__be16 flags, __be16 proto,
__be32 key, __be32 seq)
@@ -146,7 +135,13 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len,
!(skb_shinfo(skb)->gso_type &
(SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
*ptr = 0;
- *(__sum16 *)ptr = gre_checksum(skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ *(__sum16 *)ptr = csum_fold(lco_csum(skb));
+ } else {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = sizeof(*greh);
+ }
}
}
}
diff --git a/include/net/gro.h b/include/net/gro.h
new file mode 100644
index 000000000000..a4fab706240d
--- /dev/null
+++ b/include/net/gro.h
@@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _NET_IPV6_GRO_H
+#define _NET_IPV6_GRO_H
+
+#include <linux/indirect_call_wrapper.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/skbuff.h>
+#include <net/udp.h>
+
+struct napi_gro_cb {
+ /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
+ void *frag0;
+
+ /* Length of frag0. */
+ unsigned int frag0_len;
+
+ /* This indicates where we are processing relative to skb->data. */
+ int data_offset;
+
+ /* This is non-zero if the packet cannot be merged with the new skb. */
+ u16 flush;
+
+ /* Save the IP ID here and check when we get to the transport layer */
+ u16 flush_id;
+
+ /* Number of segments aggregated. */
+ u16 count;
+
+ /* Used in ipv6_gro_receive() and foo-over-udp */
+ u16 proto;
+
+ /* jiffies when first packet was created/queued */
+ unsigned long age;
+
+/* Used in napi_gro_cb::free */
+#define NAPI_GRO_FREE 1
+#define NAPI_GRO_FREE_STOLEN_HEAD 2
+ /* portion of the cb set to zero at every gro iteration */
+ struct_group(zeroed,
+
+ /* Start offset for remote checksum offload */
+ u16 gro_remcsum_start;
+
+ /* This is non-zero if the packet may be of the same flow. */
+ u8 same_flow:1;
+
+ /* Used in tunnel GRO receive */
+ u8 encap_mark:1;
+
+ /* GRO checksum is valid */
+ u8 csum_valid:1;
+
+ /* Number of checksums via CHECKSUM_UNNECESSARY */
+ u8 csum_cnt:3;
+
+ /* Free the skb? */
+ u8 free:2;
+
+ /* Used in foo-over-udp, set in udp[46]_gro_receive */
+ u8 is_ipv6:1;
+
+ /* Used in GRE, set in fou/gue_gro_receive */
+ u8 is_fou:1;
+
+ /* Used to determine if flush_id can be ignored */
+ u8 is_atomic:1;
+
+ /* Number of gro_receive callbacks this packet already went through */
+ u8 recursion_counter:4;
+
+ /* GRO is done by frag_list pointer chaining. */
+ u8 is_flist:1;
+ );
+
+ /* used to support CHECKSUM_COMPLETE for tunneling protocols */
+ __wsum csum;
+
+ /* used in skb_gro_receive() slow path */
+ struct sk_buff *last;
+};
+
+#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
+
+#define GRO_RECURSION_LIMIT 15
+static inline int gro_recursion_inc_test(struct sk_buff *skb)
+{
+ return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
+}
+
+typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
+static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ if (unlikely(gro_recursion_inc_test(skb))) {
+ NAPI_GRO_CB(skb)->flush |= 1;
+ return NULL;
+ }
+
+ return cb(head, skb);
+}
+
+typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
+ struct sk_buff *);
+static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
+ struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ if (unlikely(gro_recursion_inc_test(skb))) {
+ NAPI_GRO_CB(skb)->flush |= 1;
+ return NULL;
+ }
+
+ return cb(sk, head, skb);
+}
+
+static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
+{
+ return NAPI_GRO_CB(skb)->data_offset;
+}
+
+static inline unsigned int skb_gro_len(const struct sk_buff *skb)
+{
+ return skb->len - NAPI_GRO_CB(skb)->data_offset;
+}
+
+static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
+{
+ NAPI_GRO_CB(skb)->data_offset += len;
+}
+
+static inline void *skb_gro_header_fast(struct sk_buff *skb,
+ unsigned int offset)
+{
+ return NAPI_GRO_CB(skb)->frag0 + offset;
+}
+
+static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
+{
+ return NAPI_GRO_CB(skb)->frag0_len < hlen;
+}
+
+static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
+{
+ NAPI_GRO_CB(skb)->frag0 = NULL;
+ NAPI_GRO_CB(skb)->frag0_len = 0;
+}
+
+static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
+ unsigned int offset)
+{
+ if (!pskb_may_pull(skb, hlen))
+ return NULL;
+
+ skb_gro_frag0_invalidate(skb);
+ return skb->data + offset;
+}
+
+static inline void *skb_gro_header(struct sk_buff *skb,
+ unsigned int hlen, unsigned int offset)
+{
+ void *ptr;
+
+ ptr = skb_gro_header_fast(skb, offset);
+ if (skb_gro_header_hard(skb, hlen))
+ ptr = skb_gro_header_slow(skb, hlen, offset);
+ return ptr;
+}
+
+static inline void *skb_gro_network_header(struct sk_buff *skb)
+{
+ return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
+ skb_network_offset(skb);
+}
+
+static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
+{
+ const struct iphdr *iph = skb_gro_network_header(skb);
+
+ return csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ skb_gro_len(skb), proto, 0);
+}
+
+static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
+ const void *start, unsigned int len)
+{
+ if (NAPI_GRO_CB(skb)->csum_valid)
+ NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len,
+ wsum_negate(NAPI_GRO_CB(skb)->csum)));
+}
+
+/* GRO checksum functions. These are logical equivalents of the normal
+ * checksum functions (in skbuff.h) except that they operate on the GRO
+ * offsets and fields in sk_buff.
+ */
+
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
+
+static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
+{
+ return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
+}
+
+static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
+ bool zero_okay,
+ __sum16 check)
+{
+ return ((skb->ip_summed != CHECKSUM_PARTIAL ||
+ skb_checksum_start_offset(skb) <
+ skb_gro_offset(skb)) &&
+ !skb_at_gro_remcsum_start(skb) &&
+ NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+ (!zero_okay || check));
+}
+
+static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
+ __wsum psum)
+{
+ if (NAPI_GRO_CB(skb)->csum_valid &&
+ !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
+ return 0;
+
+ NAPI_GRO_CB(skb)->csum = psum;
+
+ return __skb_gro_checksum_complete(skb);
+}
+
+static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
+{
+ if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
+ /* Consume a checksum from CHECKSUM_UNNECESSARY */
+ NAPI_GRO_CB(skb)->csum_cnt--;
+ } else {
+ /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
+ * verified a new top level checksum or an encapsulated one
+ * during GRO. This saves work if we fallback to normal path.
+ */
+ __skb_incr_checksum_unnecessary(skb);
+ }
+}
+
+#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \
+ compute_pseudo) \
+({ \
+ __sum16 __ret = 0; \
+ if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \
+ __ret = __skb_gro_checksum_validate_complete(skb, \
+ compute_pseudo(skb, proto)); \
+ if (!__ret) \
+ skb_gro_incr_csum_unnecessary(skb); \
+ __ret; \
+})
+
+#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \
+ __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
+
+#define skb_gro_checksum_validate_zero_check(skb, proto, check, \
+ compute_pseudo) \
+ __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
+
+#define skb_gro_checksum_simple_validate(skb) \
+ __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
+
+static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
+{
+ return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+ !NAPI_GRO_CB(skb)->csum_valid);
+}
+
+static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
+ __wsum pseudo)
+{
+ NAPI_GRO_CB(skb)->csum = ~pseudo;
+ NAPI_GRO_CB(skb)->csum_valid = 1;
+}
+
+#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \
+do { \
+ if (__skb_gro_checksum_convert_check(skb)) \
+ __skb_gro_checksum_convert(skb, \
+ compute_pseudo(skb, proto)); \
+} while (0)
+
+struct gro_remcsum {
+ int offset;
+ __wsum delta;
+};
+
+static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
+{
+ grc->offset = 0;
+ grc->delta = 0;
+}
+
+static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
+ unsigned int off, size_t hdrlen,
+ int start, int offset,
+ struct gro_remcsum *grc,
+ bool nopartial)
+{
+ __wsum delta;
+ size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
+
+ BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
+
+ if (!nopartial) {
+ NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
+ return ptr;
+ }
+
+ ptr = skb_gro_header(skb, off + plen, off);
+ if (!ptr)
+ return NULL;
+
+ delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
+ start, offset);
+
+ /* Adjust skb->csum since we changed the packet */
+ NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+ grc->offset = off + hdrlen + offset;
+ grc->delta = delta;
+
+ return ptr;
+}
+
+static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
+ struct gro_remcsum *grc)
+{
+ void *ptr;
+ size_t plen = grc->offset + sizeof(u16);
+
+ if (!grc->delta)
+ return;
+
+ ptr = skb_gro_header(skb, plen, grc->offset);
+ if (!ptr)
+ return;
+
+ remcsum_unadjust((__sum16 *)ptr, grc->delta);
+}
+
+#ifdef CONFIG_XFRM_OFFLOAD
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
+{
+ if (PTR_ERR(pp) != -EINPROGRESS)
+ NAPI_GRO_CB(skb)->flush |= flush;
+}
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+ struct sk_buff *pp,
+ int flush,
+ struct gro_remcsum *grc)
+{
+ if (PTR_ERR(pp) != -EINPROGRESS) {
+ NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_remcsum_cleanup(skb, grc);
+ skb->remcsum_offload = 0;
+ }
+}
+#else
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
+{
+ NAPI_GRO_CB(skb)->flush |= flush;
+}
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+ struct sk_buff *pp,
+ int flush,
+ struct gro_remcsum *grc)
+{
+ NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_remcsum_cleanup(skb, grc);
+ skb->remcsum_offload = 0;
+}
+#endif
+
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
+
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+
+#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \
+({ \
+ unlikely(gro_recursion_inc_test(skb)) ? \
+ NAPI_GRO_CB(skb)->flush |= 1, NULL : \
+ INDIRECT_CALL_INET(cb, f2, f1, head, skb); \
+})
+
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct udphdr *uh, struct sock *sk);
+int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
+
+static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
+{
+ struct udphdr *uh;
+ unsigned int hlen, off;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*uh);
+ uh = skb_gro_header(skb, hlen, off);
+
+ return uh;
+}
+
+static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
+{
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
+
+ return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
+ skb_gro_len(skb), proto, 0));
+}
+
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
+
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static inline void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ napi->rx_count += segs;
+ if (napi->rx_count >= READ_ONCE(gro_normal_batch))
+ gro_normal_list(napi);
+}
+
+
+#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/gtp.h b/include/net/gtp.h
index 0e16ebb2a82d..2a503f035d18 100644
--- a/include/net/gtp.h
+++ b/include/net/gtp.h
@@ -2,13 +2,22 @@
#ifndef _GTP_H_
#define _GTP_H_
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/rtnetlink.h>
+
/* General GTP protocol related definitions. */
#define GTP0_PORT 3386
#define GTP1U_PORT 2152
+/* GTP messages types */
+#define GTP_ECHO_REQ 1 /* Echo Request */
+#define GTP_ECHO_RSP 2 /* Echo Response */
#define GTP_TPDU 255
+#define GTPIE_RECOVERY 14
+
struct gtp0_header { /* According to GSM TS 09.60. */
__u8 flags;
__u8 type;
@@ -27,6 +36,43 @@ struct gtp1_header { /* According to 3GPP TS 29.060. */
__be32 tid;
} __attribute__ ((packed));
+struct gtp1_header_long { /* According to 3GPP TS 29.060. */
+ __u8 flags;
+ __u8 type;
+ __be16 length;
+ __be32 tid;
+ __be16 seq;
+ __u8 npdu;
+ __u8 next;
+} __packed;
+
+/* GTP Information Element */
+struct gtp_ie {
+ __u8 tag;
+ __u8 val;
+} __packed;
+
+struct gtp0_packet {
+ struct gtp0_header gtp0_h;
+ struct gtp_ie ie;
+} __packed;
+
+struct gtp1u_packet {
+ struct gtp1_header_long gtp1u_h;
+ struct gtp_ie ie;
+} __packed;
+
+struct gtp_pdu_session_info { /* According to 3GPP TS 38.415. */
+ u8 pdu_type;
+ u8 qfi;
+};
+
+static inline bool netif_is_gtp(const struct net_device *dev)
+{
+ return dev->rtnl_link_ops &&
+ !strcmp(dev->rtnl_link_ops->kind, "gtp");
+}
+
#define GTP1_F_NPDU 0x01
#define GTP1_F_SEQ 0x02
#define GTP1_F_EXTHDR 0x04
diff --git a/include/net/gue.h b/include/net/gue.h
index 3a6595bfa641..dfca298bec9c 100644
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -21,7 +21,7 @@
* | |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
- * C bit indicates contol message when set, data message when unset.
+ * C bit indicates control message when set, data message when unset.
* For a control message, proto/ctype is interpreted as a type of
* control message. For data messages, proto/ctype is the IP protocol
* of the next header.
@@ -30,6 +30,9 @@
* may refer to options placed after this field.
*/
+#include <asm/byteorder.h>
+#include <linux/types.h>
+
struct guehdr {
union {
struct {
diff --git a/include/net/hwbm.h b/include/net/hwbm.h
index c81444611a22..aa495decec35 100644
--- a/include/net/hwbm.h
+++ b/include/net/hwbm.h
@@ -2,6 +2,8 @@
#ifndef _HWBM_H
#define _HWBM_H
+#include <linux/mutex.h>
+
struct hwbm_pool {
/* Capacity of the pool */
int size;
diff --git a/include/net/icmp.h b/include/net/icmp.h
index 9ac2d2672a93..caddf4a59ad1 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -46,12 +46,17 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32
#if IS_ENABLED(CONFIG_NF_NAT)
void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
#else
-#define icmp_ndo_send icmp_send
+static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct ip_options opts = { 0 };
+ __icmp_send(skb_in, type, code, info, &opts);
+}
#endif
int icmp_rcv(struct sk_buff *skb);
int icmp_err(struct sk_buff *skb, u32 info);
int icmp_init(void);
void icmp_out_count(struct net *net, unsigned char type);
+bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr);
#endif /* _ICMP_H */
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 459d355f6506..598f53d2a3a0 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2017 Intel Deutschland GmbH
- * Copyright (c) 2018-2019 Intel Corporation
+ * Copyright (c) 2018-2019, 2021 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -43,6 +43,11 @@ struct ieee80211_radiotap_header {
* @it_present: (first) present word
*/
__le32 it_present;
+
+ /**
+ * @it_optional: all remaining presence bitmaps
+ */
+ __le32 it_optional[];
} __packed;
/* version is always 0 */
@@ -117,6 +122,8 @@ enum ieee80211_radiotap_tx_flags {
IEEE80211_RADIOTAP_F_TX_CTS = 0x0002,
IEEE80211_RADIOTAP_F_TX_RTS = 0x0004,
IEEE80211_RADIOTAP_F_TX_NOACK = 0x0008,
+ IEEE80211_RADIOTAP_F_TX_NOSEQNO = 0x0010,
+ IEEE80211_RADIOTAP_F_TX_ORDER = 0x0020,
};
/* for IEEE80211_RADIOTAP_MCS "have" flags */
@@ -358,7 +365,7 @@ enum ieee80211_radiotap_zero_len_psdu_type {
*/
static inline u16 ieee80211_get_radiotap_len(const char *data)
{
- struct ieee80211_radiotap_header *hdr = (void *)data;
+ const struct ieee80211_radiotap_header *hdr = (const void *)data;
return get_unaligned_le16(&hdr->it_len);
}
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index d0d188c3294b..03b64bf876a4 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -15,6 +15,22 @@
#ifndef IEEE802154_NETDEVICE_H
#define IEEE802154_NETDEVICE_H
+#define IEEE802154_REQUIRED_SIZE(struct_type, member) \
+ (offsetof(typeof(struct_type), member) + \
+ sizeof(((typeof(struct_type) *)(NULL))->member))
+
+#define IEEE802154_ADDR_OFFSET \
+ offsetof(typeof(struct sockaddr_ieee802154), addr)
+
+#define IEEE802154_MIN_NAMELEN (IEEE802154_ADDR_OFFSET + \
+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, addr_type))
+
+#define IEEE802154_NAMELEN_SHORT (IEEE802154_ADDR_OFFSET + \
+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, short_addr))
+
+#define IEEE802154_NAMELEN_LONG (IEEE802154_ADDR_OFFSET + \
+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, hwaddr))
+
#include <net/af_ieee802154.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
@@ -165,6 +181,33 @@ static inline void ieee802154_devaddr_to_raw(void *raw, __le64 addr)
memcpy(raw, &temp, IEEE802154_ADDR_LEN);
}
+static inline int
+ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len)
+{
+ struct ieee802154_addr_sa *sa;
+ int ret = 0;
+
+ sa = &daddr->addr;
+ if (len < IEEE802154_MIN_NAMELEN)
+ return -EINVAL;
+ switch (sa->addr_type) {
+ case IEEE802154_ADDR_NONE:
+ break;
+ case IEEE802154_ADDR_SHORT:
+ if (len < IEEE802154_NAMELEN_SHORT)
+ ret = -EINVAL;
+ break;
+ case IEEE802154_ADDR_LONG:
+ if (len < IEEE802154_NAMELEN_LONG)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a,
const struct ieee802154_addr_sa *sa)
{
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index a01981d7108f..c8490729b4ae 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,6 +64,14 @@ struct inet6_ifaddr {
struct hlist_node addr_lst;
struct list_head if_list;
+ /*
+ * Used to safely traverse idev->addr_list in process context
+ * if the idev->lock needed to protect idev->addr_list cannot be held.
+ * In that case, add the items to this list temporarily and iterate
+ * without holding idev->lock.
+ * See addrconf_ifdown and dev_forward_change.
+ */
+ struct list_head if_list_aux;
struct list_head tmp_list;
struct inet6_ifaddr *ifpub;
@@ -71,6 +79,8 @@ struct inet6_ifaddr {
bool tokenized;
+ u8 ifa_proto;
+
struct rcu_head rcu;
struct in6_addr peer_addr;
};
@@ -78,12 +88,10 @@ struct inet6_ifaddr {
struct ip6_sf_socklist {
unsigned int sl_max;
unsigned int sl_count;
- struct in6_addr sl_addr[0];
+ struct rcu_head rcu;
+ struct in6_addr sl_addr[];
};
-#define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \
- (count) * sizeof(struct in6_addr))
-
#define IP6_SFBLOCK 10 /* allocate this many at once */
struct ipv6_mc_socklist {
@@ -91,18 +99,18 @@ struct ipv6_mc_socklist {
int ifindex;
unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */
struct ipv6_mc_socklist __rcu *next;
- rwlock_t sflock;
- struct ip6_sf_socklist *sflist;
+ struct ip6_sf_socklist __rcu *sflist;
struct rcu_head rcu;
};
struct ip6_sf_list {
- struct ip6_sf_list *sf_next;
+ struct ip6_sf_list __rcu *sf_next;
struct in6_addr sf_addr;
unsigned long sf_count[2]; /* include/exclude counts */
unsigned char sf_gsresp; /* include in g & s response? */
unsigned char sf_oldin; /* change state */
unsigned char sf_crcount; /* retrans. left to send */
+ struct rcu_head rcu;
};
#define MAF_TIMER_RUNNING 0x01
@@ -114,19 +122,19 @@ struct ip6_sf_list {
struct ifmcaddr6 {
struct in6_addr mca_addr;
struct inet6_dev *idev;
- struct ifmcaddr6 *next;
- struct ip6_sf_list *mca_sources;
- struct ip6_sf_list *mca_tomb;
+ struct ifmcaddr6 __rcu *next;
+ struct ip6_sf_list __rcu *mca_sources;
+ struct ip6_sf_list __rcu *mca_tomb;
unsigned int mca_sfmode;
unsigned char mca_crcount;
unsigned long mca_sfcount[2];
- struct timer_list mca_timer;
+ struct delayed_work mca_work;
unsigned int mca_flags;
int mca_users;
refcount_t mca_refcnt;
- spinlock_t mca_lock;
unsigned long mca_cstamp;
unsigned long mca_tstamp;
+ struct rcu_head rcu;
};
/* Anycast stuff */
@@ -162,12 +170,12 @@ struct ipv6_devstat {
struct inet6_dev {
struct net_device *dev;
+ netdevice_tracker dev_tracker;
struct list_head addr_list;
- struct ifmcaddr6 *mc_list;
- struct ifmcaddr6 *mc_tomb;
- spinlock_t mc_lock;
+ struct ifmcaddr6 __rcu *mc_list;
+ struct ifmcaddr6 __rcu *mc_tomb;
unsigned char mc_qrv; /* Query Robustness Variable */
unsigned char mc_gq_running;
@@ -179,9 +187,18 @@ struct inet6_dev {
unsigned long mc_qri; /* Query Response Interval */
unsigned long mc_maxdelay;
- struct timer_list mc_gq_timer; /* general query timer */
- struct timer_list mc_ifc_timer; /* interface change timer */
- struct timer_list mc_dad_timer; /* dad complete mc timer */
+ struct delayed_work mc_gq_work; /* general query work */
+ struct delayed_work mc_ifc_work; /* interface change work */
+ struct delayed_work mc_dad_work; /* dad complete mc work */
+ struct delayed_work mc_query_work; /* mld query work */
+ struct delayed_work mc_report_work; /* mld report work */
+
+ struct sk_buff_head mc_query_queue; /* mld query queue */
+ struct sk_buff_head mc_report_queue; /* mld report queue */
+
+ spinlock_t mc_query_lock; /* mld query queue lock */
+ spinlock_t mc_report_lock; /* mld query report lock */
+ struct mutex mc_lock; /* mld global lock */
struct ifacaddr6 *ac_list;
rwlock_t lock;
@@ -190,7 +207,6 @@ struct inet6_dev {
int dead;
u32 desync_factor;
- u8 rndid[8];
struct list_head tempaddr_list;
struct in6_addr token;
@@ -205,6 +221,8 @@ struct inet6_dev {
unsigned long tstamp; /* ipv6InterfaceTable update timestamp */
struct rcu_head rcu;
+
+ unsigned int ra_mtu;
};
static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf)
diff --git a/include/net/ila.h b/include/net/ila.h
index f98dcd5791b0..73ebe5eab272 100644
--- a/include/net/ila.h
+++ b/include/net/ila.h
@@ -8,6 +8,8 @@
#ifndef _NET_ILA_H
#define _NET_ILA_H
+struct sk_buff;
+
int ila_xlat_outgoing(struct sk_buff *skb);
int ila_xlat_incoming(struct sk_buff *skb);
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 7392f959a405..025bd8d3c769 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -11,6 +11,8 @@
#include <linux/types.h>
+struct flowi;
+struct flowi6;
struct request_sock;
struct sk_buff;
struct sock;
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index fe96bf247aac..56f1286583d3 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -85,9 +85,8 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
int iif, int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb);
+ struct sock *sk = skb_steal_sock(skb, refcounted);
- *refcounted = true;
if (sk)
return sk;
@@ -104,15 +103,24 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
const int dif);
int inet6_hash(struct sock *sk);
-#endif /* IS_ENABLED(CONFIG_IPV6) */
-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_family == AF_INET6) && \
- ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
- ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
+static inline bool inet6_match(struct net *net, const struct sock *sk,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ const __portpair ports,
+ const int dif, const int sdif)
+{
+ if (!net_eq(sock_net(sk), net) ||
+ sk->sk_family != AF_INET6 ||
+ sk->sk_portpair != ports ||
+ !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) ||
+ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return false;
+
+ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
+ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
+ sdif);
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index ae2ba897675c..cec453c18f1d 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -3,6 +3,10 @@
#define _INET_COMMON_H
#include <linux/indirect_call_wrapper.h>
+#include <linux/net.h>
+#include <linux/netdev_features.h>
+#include <linux/types.h>
+#include <net/sock.h>
extern const struct proto_ops inet_stream_ops;
extern const struct proto_ops inet_dgram_ops;
@@ -12,6 +16,8 @@ extern const struct proto_ops inet_dgram_ops;
*/
struct msghdr;
+struct net;
+struct page;
struct sock;
struct sockaddr;
struct socket;
@@ -35,8 +41,16 @@ int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+/* Don't allocate port at this moment, defer to connect. */
+#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
+/* Grab and release socket lock. */
+#define BIND_WITH_LOCK (1 << 1)
+/* Called from BPF program. */
+#define BIND_FROM_BPF (1 << 2)
+/* Skip CAP_NET_BIND_SERVICE check. */
+#define BIND_NO_CAP_NET_BIND_SERVICE (1 << 3)
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock);
+ u32 flags);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 895546058a20..c2b15f7e5516 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -16,6 +16,7 @@
#include <linux/timer.h>
#include <linux/poll.h>
#include <linux/kernel.h>
+#include <linux/sockptr.h>
#include <net/inet_sock.h>
#include <net/request_sock.h>
@@ -24,6 +25,7 @@
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
+struct inet_bind2_bucket;
struct tcp_congestion_ops;
/*
@@ -45,17 +47,9 @@ struct inet_connection_sock_af_ops {
u16 net_frag_header_len;
u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-#ifdef CONFIG_COMPAT
- int (*compat_setsockopt)(struct sock *sk,
- int level, int optname,
- char __user *optval, unsigned int optlen);
- int (*compat_getsockopt)(struct sock *sk,
- int level, int optname,
- char __user *optval, int __user *optlen);
-#endif
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
void (*mtu_reduced)(struct sock *sk);
};
@@ -64,6 +58,7 @@ struct inet_connection_sock_af_ops {
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
+ * @icsk_bind2_hash: Bind node in the bhash2 table
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
@@ -73,7 +68,6 @@ struct inet_connection_sock_af_ops {
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
* @icsk_clean_acked Clean acked data hook
- * @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
@@ -83,25 +77,30 @@ struct inet_connection_sock_af_ops {
* @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options)
* @icsk_ack: Delayed ACK control data
* @icsk_mtup; MTU probing control data
+ * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack)
+ * @icsk_user_timeout: TCP_USER_TIMEOUT value
*/
struct inet_connection_sock {
/* inet_sock has to be the first member! */
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
+ struct inet_bind2_bucket *icsk_bind2_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
+ __u32 icsk_rto_min;
+ __u32 icsk_delack_max;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void __rcu *icsk_ulp_data;
void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
- struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
- __u8 icsk_ca_state:6,
+ __u8 icsk_ca_state:5,
+ icsk_ca_initialized:1,
icsk_ca_setsockopt:1,
icsk_ca_dst_locked:1;
__u8 icsk_retransmits;
@@ -114,7 +113,7 @@ struct inet_connection_sock {
__u8 pending; /* ACK is pending */
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
- __u8 blocked; /* Delayed ACK was blocked by socket lock */
+ __u8 retry; /* Number of attempts */
__u32 ato; /* Predicted tick of soft clock */
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
@@ -122,27 +121,27 @@ struct inet_connection_sock {
__u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
struct {
- int enabled;
-
/* Range of MTUs to search */
int search_high;
int search_low;
/* Information on the current probe. */
- int probe_size;
+ u32 probe_size:31,
+ /* Is the MTUP feature enabled for this connection? */
+ enabled:1;
u32 probe_timestamp;
} icsk_mtup;
+ u32 icsk_probes_tstamp;
u32 icsk_user_timeout;
u64 icsk_ca_priv[104 / sizeof(u64)];
-#define ICSK_CA_PRIV_SIZE (13 * sizeof(u64))
+#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv)
};
#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
#define ICSK_TIME_DACK 2 /* Delayed ack timer */
#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */
-#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */
#define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */
#define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */
@@ -202,7 +201,8 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
#endif
} else if (what == ICSK_TIME_DACK) {
- icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
+ icsk->icsk_ack.pending = 0;
+ icsk->icsk_ack.retry = 0;
#ifdef INET_CSK_CLEAR_TIMERS
sk_stop_timer(sk, &icsk->icsk_delack_timer);
#endif
@@ -227,8 +227,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
}
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
- what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE ||
- what == ICSK_TIME_REO_TIMEOUT) {
+ what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
icsk->icsk_pending = what;
icsk->icsk_timeout = jiffies + when;
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
@@ -284,9 +283,24 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog;
}
-void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
+static inline unsigned long
+reqsk_timeout(struct request_sock *req, unsigned long max_timeout)
+{
+ u64 timeout = (u64)req->timeout << req->num_timeout;
+
+ return (unsigned long)min_t(u64, timeout, max_timeout);
+}
+
+static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
+{
+ /* The below has to be done to allow calling inet_csk_destroy_sock */
+ sock_set_flag(sk, SOCK_DEAD);
+ this_cpu_inc(*sk->sk_prot->orphan_count);
+}
+
void inet_csk_destroy_sock(struct sock *sk);
void inet_csk_prepare_forced_close(struct sock *sk);
@@ -299,19 +313,18 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
(EPOLLIN | EPOLLRDNORM) : 0;
}
-int inet_csk_listen_start(struct sock *sk, int backlog);
+int inet_csk_listen_start(struct sock *sk);
void inet_csk_listen_stop(struct sock *sk);
void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+/* update the fast reuse flag when adding a socket */
+void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+ struct sock *sk);
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
-#define TCP_PINGPONG_THRESH 3
+#define TCP_PINGPONG_THRESH 1
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
@@ -328,11 +341,9 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+static inline bool inet_csk_has_ulp(struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_ack.pingpong < U8_MAX)
- icsk->icsk_ack.pingpong++;
+ return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
}
+
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h
new file mode 100644
index 000000000000..72f250dffada
--- /dev/null
+++ b/include/net/inet_dscp.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP)
+ *
+ * DSCP is defined in RFC 2474:
+ *
+ * 0 1 2 3 4 5 6 7
+ * +---+---+---+---+---+---+---+---+
+ * | DSCP | CU |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * DSCP: differentiated services codepoint
+ * CU: currently unused
+ *
+ * The whole DSCP + CU bits form the DS field.
+ * The DS field is also commonly called TOS or Traffic Class (for IPv6).
+ *
+ * Note: the CU bits are now used for Explicit Congestion Notification
+ * (RFC 3168).
+ */
+
+#ifndef _INET_DSCP_H
+#define _INET_DSCP_H
+
+#include <linux/types.h>
+
+/* Special type for storing DSCP values.
+ *
+ * A dscp_t variable stores a DS field with the CU (ECN) bits cleared.
+ * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding
+ * bugs where ECN bits are erroneously taken into account during FIB lookups
+ * or policy routing.
+ *
+ * Note: to get the real DSCP value contained in a dscp_t variable one would
+ * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have
+ * a helper for that, but there's currently no users.
+ */
+typedef u8 __bitwise dscp_t;
+
+#define INET_DSCP_MASK 0xfc
+
+static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield)
+{
+ return (__force dscp_t)(dsfield & INET_DSCP_MASK);
+}
+
+static inline __u8 inet_dscp_to_dsfield(dscp_t dscp)
+{
+ return (__force __u8)dscp;
+}
+
+static inline bool inet_validate_dscp(__u8 val)
+{
+ return !(val & ~INET_DSCP_MASK);
+}
+
+#endif /* _INET_DSCP_H */
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index c8e2bebd8d93..ea32393464a2 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -4,9 +4,11 @@
#include <linux/ip.h>
#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
#include <net/inet_sock.h>
#include <net/dsfield.h>
+#include <net/checksum.h>
enum {
INET_ECN_NOT_ECT = 0,
@@ -74,8 +76,8 @@ static inline void INET_ECN_dontxmit(struct sock *sk)
static inline int IP_ECN_set_ce(struct iphdr *iph)
{
- u32 check = (__force u32)iph->check;
u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
+ __be16 check_add;
/*
* After the last operation we have (in binary):
@@ -92,13 +94,24 @@ static inline int IP_ECN_set_ce(struct iphdr *iph)
* INET_ECN_ECT_1 => check += htons(0xFFFD)
* INET_ECN_ECT_0 => check += htons(0xFFFE)
*/
- check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn);
+ check_add = (__force __be16)((__force u16)htons(0xFFFB) +
+ (__force u16)htons(ecn));
- iph->check = (__force __sum16)(check + (check>=0xFFFF));
+ iph->check = csum16_add(iph->check, check_add);
iph->tos |= INET_ECN_CE;
return 1;
}
+static inline int IP_ECN_set_ect1(struct iphdr *iph)
+{
+ if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
+ return 0;
+
+ iph->check = csum16_add(iph->check, htons(0x1));
+ iph->tos ^= INET_ECN_MASK;
+ return 1;
+}
+
static inline void IP_ECN_clear(struct iphdr *iph)
{
iph->tos &= ~INET_ECN_MASK;
@@ -134,6 +147,22 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
return 1;
}
+static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
+{
+ __be32 from, to;
+
+ if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
+ return 0;
+
+ from = *(__be32 *)iph;
+ to = from ^ htonl(INET_ECN_MASK << 20);
+ *(__be32 *)iph = to;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+ (__force __wsum)to);
+ return 1;
+}
+
static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
{
dscp &= ~INET_ECN_MASK;
@@ -142,7 +171,7 @@ static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
static inline int INET_ECN_set_ce(struct sk_buff *skb)
{
- switch (skb->protocol) {
+ switch (skb_protocol(skb, true)) {
case cpu_to_be16(ETH_P_IP):
if (skb_network_header(skb) + sizeof(struct iphdr) <=
skb_tail_pointer(skb))
@@ -159,6 +188,42 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
return 0;
}
+static inline int skb_get_dsfield(struct sk_buff *skb)
+{
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+ break;
+ return ipv4_get_dsfield(ip_hdr(skb));
+
+ case cpu_to_be16(ETH_P_IPV6):
+ if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+ break;
+ return ipv6_get_dsfield(ipv6_hdr(skb));
+ }
+
+ return -1;
+}
+
+static inline int INET_ECN_set_ect1(struct sk_buff *skb)
+{
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ if (skb_network_header(skb) + sizeof(struct iphdr) <=
+ skb_tail_pointer(skb))
+ return IP_ECN_set_ect1(ip_hdr(skb));
+ break;
+
+ case cpu_to_be16(ETH_P_IPV6):
+ if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
+ skb_tail_pointer(skb))
+ return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
+ break;
+ }
+
+ return 0;
+}
+
/*
* RFC 6040 4.2
* To decapsulate the inner header at the tunnel egress, a compliant
@@ -208,8 +273,12 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
int rc;
rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
- if (!rc && set_ce)
- INET_ECN_set_ce(skb);
+ if (!rc) {
+ if (set_ce)
+ INET_ECN_set_ce(skb);
+ else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
+ INET_ECN_set_ect1(skb);
+ }
return rc;
}
@@ -219,12 +288,16 @@ static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
{
__u8 inner;
- if (skb->protocol == htons(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ break;
+ case htons(ETH_P_IPV6):
inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
+ break;
+ default:
return 0;
+ }
return INET_ECN_decapsulate(skb, oiph->tos, inner);
}
@@ -234,12 +307,16 @@ static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
{
__u8 inner;
- if (skb->protocol == htons(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ break;
+ case htons(ETH_P_IPV6):
inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
+ break;
+ default:
return 0;
+ }
return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
}
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index bac79e817776..0b0876610553 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -4,6 +4,9 @@
#include <linux/rhashtable-types.h>
#include <linux/completion.h>
+#include <linux/in6.h>
+#include <linux/rbtree_types.h>
+#include <linux/refcount.h>
/* Per netns frag queues directory */
struct fqdir {
@@ -21,6 +24,7 @@ struct fqdir {
/* Keep atomic mem on separate cachelines in structs that include it */
atomic_long_t mem ____cacheline_aligned_in_smp;
struct work_struct destroy_work;
+ struct llist_node free_list;
};
/**
@@ -69,6 +73,7 @@ struct frag_v6_compare_key {
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
+ * @mono_delivery_time: stamp has a mono delivery time (EDT)
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @fqdir: pointer to struct fqdir
@@ -89,6 +94,7 @@ struct inet_frag_queue {
ktime_t stamp;
int len;
int meat;
+ u8 mono_delivery_time;
__u8 flags;
u16 max_size;
struct fqdir *fqdir;
@@ -116,8 +122,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
static inline void fqdir_pre_exit(struct fqdir *fqdir)
{
- fqdir->high_thresh = 0; /* prevent creation of new frags */
- fqdir->dead = true;
+ /* Prevent creation of new frags.
+ * Pairs with READ_ONCE() in inet_frag_find().
+ */
+ WRITE_ONCE(fqdir->high_thresh, 0);
+
+ /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+ * and ip6frag_expire_frag_queue().
+ */
+ WRITE_ONCE(fqdir->dead, true);
}
void fqdir_exit(struct fqdir *fqdir);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index d0019d3395cf..3af1e927247d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -23,6 +23,7 @@
#include <net/inet_connection_sock.h>
#include <net/inet_sock.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/route.h>
#include <net/tcp_states.h>
@@ -90,7 +91,31 @@ struct inet_bind_bucket {
struct hlist_head owners;
};
-static inline struct net *ib_net(struct inet_bind_bucket *ib)
+struct inet_bind2_bucket {
+ possible_net_t ib_net;
+ int l3mdev;
+ unsigned short port;
+#if IS_ENABLED(CONFIG_IPV6)
+ unsigned short family;
+#endif
+ union {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr v6_rcv_saddr;
+#endif
+ __be32 rcv_saddr;
+ };
+ /* Node in the bhash2 inet_bind_hashbucket chain */
+ struct hlist_node node;
+ /* List of sockets hashed to this bucket */
+ struct hlist_head owners;
+};
+
+static inline struct net *ib_net(const struct inet_bind_bucket *ib)
+{
+ return read_pnet(&ib->ib_net);
+}
+
+static inline struct net *ib2_net(const struct inet_bind2_bucket *ib)
{
return read_pnet(&ib->ib_net);
}
@@ -111,11 +136,7 @@ struct inet_bind_hashbucket {
#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
- unsigned int count;
- union {
- struct hlist_head head;
- struct hlist_nulls_head nulls_head;
- };
+ struct hlist_nulls_head nulls_head;
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -137,31 +158,32 @@ struct inet_hashinfo {
* TCP hash as well as the others for fast bind/connect.
*/
struct kmem_cache *bind_bucket_cachep;
+ /* This bind table is hashed by local port */
struct inet_bind_hashbucket *bhash;
+ struct kmem_cache *bind2_bucket_cachep;
+ /* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4)
+ * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used
+ * primarily for expediting bind conflict resolution.
+ */
+ struct inet_bind_hashbucket *bhash2;
unsigned int bhash_size;
/* The 2nd listener table hashed by local port and address */
unsigned int lhash2_mask;
struct inet_listen_hashbucket *lhash2;
- /* All the above members are written once at bootup and
- * never written again _or_ are predominantly read-access.
- *
- * Now align to a new cache line as all the following members
- * might be often dirty.
- */
- /* All sockets in TCP_LISTEN state will be in listening_hash.
- * This is the only table where wildcard'd TCP sockets can
- * exist. listening_hash is only hashed by local port number.
- * If lhash2 is initialized, the same socket will also be hashed
- * to lhash2 by port and address.
- */
- struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
- ____cacheline_aligned_in_smp;
+ bool pernet;
};
-#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
- hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
+static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IP_DCCP)
+ return sk->sk_prot->h.hashinfo ? :
+ sock_net(sk)->ipv4.tcp_death_row.hashinfo;
+#else
+ return sock_net(sk)->ipv4.tcp_death_row.hashinfo;
+#endif
+}
static inline struct inet_listen_hashbucket *
inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
@@ -185,22 +207,21 @@ static inline spinlock_t *inet_ehash_lockp(
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
+static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
+{
+ kfree(h->lhash2);
+ h->lhash2 = NULL;
+}
+
static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
{
kvfree(hashinfo->ehash_locks);
hashinfo->ehash_locks = NULL;
}
-static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
- int dif, int sdif)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
- bound_dev_if, dif, sdif);
-#else
- return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
-#endif
-}
+struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo,
+ unsigned int ehash_entries);
+void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo);
struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
@@ -209,40 +230,76 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
+bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev);
+
+struct inet_bind2_bucket *
+inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
+ struct inet_bind_hashbucket *head,
+ unsigned short port, int l3mdev,
+ const struct sock *sk);
+
+void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
+ struct inet_bind2_bucket *tb);
+
+struct inet_bind2_bucket *
+inet_bind2_bucket_find(const struct inet_bind_hashbucket *head,
+ const struct net *net,
+ unsigned short port, int l3mdev,
+ const struct sock *sk);
+
+bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev, const struct sock *sk);
+
static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
const u32 bhash_size)
{
return (lport + net_hash_mix(net)) & (bhash_size - 1);
}
-void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum);
-
-/* These can have wildcards, don't try too hard. */
-static inline u32 inet_lhashfn(const struct net *net, const unsigned short num)
+static inline struct inet_bind_hashbucket *
+inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
+ const struct net *net, unsigned short port)
{
- return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1);
-}
+ u32 hash;
-static inline int inet_sk_listen_hashfn(const struct sock *sk)
-{
- return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
+ else
+#endif
+ hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
+ return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
}
+struct inet_bind_hashbucket *
+inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
+
+/* This should be called whenever a socket's sk_rcv_saddr (ipv4) or
+ * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's
+ * rcv_saddr field should already have been updated when this is called.
+ */
+int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk);
+
+void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+ struct inet_bind2_bucket *tb2, unsigned short port);
+
/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
void inet_put_port(struct sock *sk);
-void inet_hashinfo_init(struct inet_hashinfo *h);
void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
unsigned long numentries, int scale,
unsigned long low_limit,
unsigned long high_limit);
int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
-bool inet_ehash_insert(struct sock *sk, struct sock *osk);
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
+ bool *found_dup_sk);
int __inet_hash(struct sock *sk, struct sock *osk);
int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
@@ -282,7 +339,6 @@ static inline struct sock *inet_lookup_listener(struct net *net,
((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
#endif
-#if (BITS_PER_LONG == 64)
#ifdef __BIG_ENDIAN
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
const __addrpair __name = (__force __addrpair) ( \
@@ -294,24 +350,20 @@ static inline struct sock *inet_lookup_listener(struct net *net,
(((__force __u64)(__be32)(__daddr)) << 32) | \
((__force __u64)(__be32)(__saddr)))
#endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_addrpair == (__cookie)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
-#else /* 32-bit arch */
-#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
- const int __name __deprecated __attribute__((unused))
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_daddr == (__saddr)) && \
- ((__sk)->sk_rcv_saddr == (__daddr)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
-#endif /* 64-bit arch */
+static inline bool inet_match(struct net *net, const struct sock *sk,
+ const __addrpair cookie, const __portpair ports,
+ int dif, int sdif)
+{
+ if (!net_eq(sock_net(sk), net) ||
+ sk->sk_portpair != ports ||
+ sk->sk_addrpair != cookie)
+ return false;
+
+ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
+ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
+ sdif);
+}
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
@@ -379,10 +431,9 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
const int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb);
+ struct sock *sk = skb_steal_sock(skb, refcounted);
const struct iphdr *iph = ip_hdr(skb);
- *refcounted = true;
if (sk)
return sk;
@@ -413,7 +464,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
}
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
struct inet_timewait_sock **));
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 34c4436fd18f..bf5654ce711e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -52,7 +52,7 @@ struct ip_options {
unsigned char router_alert;
unsigned char cipso;
unsigned char __pad2;
- unsigned char __data[0];
+ unsigned char __data[];
};
struct ip_options_rcu {
@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+ if (!sk->sk_mark &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
return sk->sk_mark;
@@ -116,14 +117,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
static inline int inet_request_bound_dev_if(const struct sock *sk,
struct sk_buff *skb)
{
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
#endif
- return sk->sk_bound_dev_if;
+ return bound_dev_if;
}
static inline int inet_sk_bound_l3mdev(const struct sock *sk)
@@ -131,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net,
sk->sk_bound_dev_if);
#endif
@@ -147,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if,
return bound_dev_if == dif || bound_dev_if == sdif;
}
+static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ int dif, int sdif)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
+ bound_dev_if, dif, sdif);
+#else
+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+#endif
+}
+
struct inet_cork {
unsigned int flags;
__be32 addr;
@@ -207,11 +220,10 @@ struct inet_sock {
__be32 inet_saddr;
__s16 uc_ttl;
__u16 cmsg_flags;
+ struct ip_options_rcu __rcu *inet_opt;
__be16 inet_sport;
__u16 inet_id;
- struct ip_options_rcu __rcu *inet_opt;
- int rx_dst_ifindex;
__u8 tos;
__u8 min_ttl;
__u8 mc_ttl;
@@ -225,6 +237,7 @@ struct inet_sock {
mc_all:1,
nodefrag:1;
__u8 bind_address_no_port:1,
+ recverr_rfc4884:1,
defer_connect:1; /* Indicates that fastopen_connect is set
* and cookie exists so we defer connect
* until first data frame is written
@@ -252,6 +265,11 @@ struct inet_sock {
#define IP_CMSG_CHECKSUM BIT(7)
#define IP_CMSG_RECVFRAGSIZE BIT(8)
+static inline bool sk_is_inet(struct sock *sk)
+{
+ return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
+}
+
/**
* sk_to_full_sk - Access to a full socket
* @sk: pointer to a socket
@@ -295,13 +313,6 @@ static inline void __inet_sk_copy_descendant(struct sock *sk_to,
memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
sk_from->sk_prot->obj_size - ancestor_size);
}
-#if !(IS_ENABLED(CONFIG_IPV6))
-static inline void inet_sk_copy_descendant(struct sock *sk_to,
- const struct sock *sk_from)
-{
- __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock));
-}
-#endif
int inet_sk_rebuild_header(struct sock *sk);
@@ -375,8 +386,20 @@ static inline bool inet_get_convert_csum(struct sock *sk)
static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
- return net->ipv4.sysctl_ip_nonlocal_bind ||
+ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
inet->freebind || inet->transparent;
}
+static inline bool inet_addr_valid_or_nonlocal(struct net *net,
+ struct inet_sock *inet,
+ __be32 addr,
+ int addr_type)
+{
+ return inet_can_nonlocal_bind(net, inet) ||
+ addr == htonl(INADDR_ANY) ||
+ addr_type == RTN_LOCAL ||
+ addr_type == RTN_MULTICAST ||
+ addr_type == RTN_BROADCAST;
+}
+
#endif /* _INET_SOCK_H */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index dfd919b3119e..5b47545f22d3 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -65,10 +65,9 @@ struct inet_timewait_sock {
/* these three are in inet_sock */
__be16 tw_sport;
/* And these are ours. */
- unsigned int tw_kill : 1,
- tw_transparent : 1,
+ unsigned int tw_transparent : 1,
tw_flowlabel : 20,
- tw_pad : 2, /* 2 bits hole */
+ tw_pad : 3, /* 3 bits hole */
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
new file mode 100644
index 000000000000..781d2d8b2f29
--- /dev/null
+++ b/include/net/ioam6.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _NET_IOAM6_H
+#define _NET_IOAM6_H
+
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/ioam6.h>
+#include <linux/rhashtable-types.h>
+
+struct ioam6_namespace {
+ struct rhash_head head;
+ struct rcu_head rcu;
+
+ struct ioam6_schema __rcu *schema;
+
+ __be16 id;
+ __be32 data;
+ __be64 data_wide;
+};
+
+struct ioam6_schema {
+ struct rhash_head head;
+ struct rcu_head rcu;
+
+ struct ioam6_namespace __rcu *ns;
+
+ u32 id;
+ int len;
+ __be32 hdr;
+
+ u8 data[];
+};
+
+struct ioam6_pernet_data {
+ struct mutex lock;
+ struct rhashtable namespaces;
+ struct rhashtable schemas;
+};
+
+static inline struct ioam6_pernet_data *ioam6_pernet(struct net *net)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ return net->ipv6.ioam6_data;
+#else
+ return NULL;
+#endif
+}
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id);
+void ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace,
+ bool is_input);
+
+int ioam6_init(void);
+void ioam6_exit(void);
+
+int ioam6_iptunnel_init(void);
+void ioam6_iptunnel_exit(void);
+
+#endif /* _NET_IOAM6_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 5b317c9f4470..038097c2a152 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -23,6 +23,8 @@
#include <linux/in.h>
#include <linux/skbuff.h>
#include <linux/jhash.h>
+#include <linux/sockptr.h>
+#include <linux/static_key.h>
#include <net/inet_sock.h>
#include <net/route.h>
@@ -30,6 +32,7 @@
#include <net/flow.h>
#include <net/flow_dissector.h>
#include <net/netns/hash.h>
+#include <net/lwtunnel.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU 68 /* RFC 791 */
@@ -53,6 +56,7 @@ struct inet_skb_parm {
#define IPSKB_DOREDIRECT BIT(5)
#define IPSKB_FRAG_PMTU BIT(6)
#define IPSKB_L3SLAVE BIT(7)
+#define IPSKB_NOPOLICY BIT(8)
u16 frag_max_size;
};
@@ -90,7 +94,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->sockc.mark = inet->sk.sk_mark;
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
- ipcm->oif = inet->sk.sk_bound_dev_if;
+ ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
}
@@ -98,7 +102,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
#define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
/* return enslaved device index if relevant */
-static inline int inet_sdif(struct sk_buff *skb)
+static inline int inet_sdif(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
@@ -150,7 +154,7 @@ int igmp_mc_init(void);
int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
__be32 saddr, __be32 daddr,
- struct ip_options_rcu *opt);
+ struct ip_options_rcu *opt, u8 tos);
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
struct net_device *orig_dev);
void ip_list_rcv(struct list_head *head, struct packet_type *pt,
@@ -231,11 +235,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
struct ipcm_cookie *ipc, struct rtable **rtp,
struct inet_cork *cork, unsigned int flags);
-static inline int ip_queue_xmit(struct sock *sk, struct sk_buff *skb,
- struct flowi *fl)
-{
- return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
-}
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
{
@@ -293,7 +293,11 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
#define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
#define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
-u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
+static inline u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt)
+{
+ return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt);
+}
+
unsigned long snmp_fold_field(void __percpu *mib, int offt);
#if BITS_PER_LONG==32
u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct,
@@ -353,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
- return port < net->ipv4.sysctl_ip_prot_sock;
+ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
}
#else
@@ -380,7 +384,7 @@ void ipfrag_init(void);
void ip_static_sysctl_init(void);
#define IP4_REPLY_MARK(net, mark) \
- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
static inline bool ip_is_fragment(const struct iphdr *iph)
{
@@ -438,26 +442,49 @@ static inline bool ip_sk_ignore_df(const struct sock *sk)
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding)
{
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net *net = dev_net(dst->dev);
+ unsigned int mtu;
- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
- !forwarding)
- return dst_mtu(dst);
+ !forwarding) {
+ mtu = rt->rt_pmtu;
+ if (mtu && time_before(jiffies, rt->dst.expires))
+ goto out;
+ }
+
+ /* 'forwarding = true' case should always honour route mtu */
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ goto out;
+
+ mtu = READ_ONCE(dst->dev->mtu);
+
+ if (unlikely(ip_mtu_locked(dst))) {
+ if (rt->rt_uses_gateway && mtu > 576)
+ mtu = 576;
+ }
+
+out:
+ mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
- return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
const struct sk_buff *skb)
{
+ unsigned int mtu;
+
if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
}
- return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+ mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+ return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
}
struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
@@ -491,7 +518,6 @@ void ip_dst_metrics_put(struct dst_entry *dst)
kfree(p);
}
-u32 ip_idents_reserve(u32 hash, int segs);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
@@ -499,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
{
struct iphdr *iph = ip_hdr(skb);
+ /* We had many attacks based on IPID, use the private
+ * generator as much as we can.
+ */
+ if (sk && inet_sk(sk)->inet_daddr) {
+ iph->id = htons(inet_sk(sk)->inet_id);
+ inet_sk(sk)->inet_id += segs;
+ return;
+ }
if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
- /* This is only to work around buggy Windows95/2000
- * VJ compression implementations. If the ID field
- * does not change, they drop every other packet in
- * a TCP stream using header compression.
- */
- if (sk && inet_sk(sk)->inet_daddr) {
- iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += segs;
- } else {
- iph->id = 0;
- }
+ iph->id = 0;
} else {
+ /* Unfortunately we need the big hammer to get a suitable IPID */
__ip_select_ident(net, iph, segs);
}
}
@@ -542,14 +567,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
-static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
-{
- const struct iphdr *iph = skb_gro_network_header(skb);
-
- return csum_tcpudp_nofold(iph->saddr, iph->daddr,
- skb_gro_len(skb), proto, 0);
-}
-
/*
* Map a multicast IP onto multicast MAC for type ethernet.
*/
@@ -695,7 +712,7 @@ int ip_forward(struct sk_buff *skb);
*/
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
- __be32 daddr, struct rtable *rt, int is_frag);
+ __be32 daddr, struct rtable *rt);
int __ip_options_echo(struct net *net, struct ip_options *dopt,
struct sk_buff *skb, const struct ip_options *sopt);
@@ -711,9 +728,7 @@ int __ip_options_compile(struct net *net, struct ip_options *opt,
int ip_options_compile(struct net *net, struct ip_options *opt,
struct sk_buff *skb);
int ip_options_get(struct net *net, struct ip_options_rcu **optp,
- unsigned char *data, int optlen);
-int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
- unsigned char __user *data, int optlen);
+ sockptr_t data, int optlen);
void ip_options_undo(struct ip_options *opt);
void ip_forward_options(struct sk_buff *skb);
int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
@@ -727,14 +742,15 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
struct ipcm_cookie *ipc, bool allow_ipv6);
-int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
+DECLARE_STATIC_KEY_FALSE(ip4_min_ttl);
+int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
+int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
+int do_ip_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen);
int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen);
-int compat_ip_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
int ip_ra_control(struct sock *sk, unsigned char on,
void (*destructor)(struct sock *));
@@ -765,4 +781,11 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
return likely(mtu >= IPV4_MIN_MTU);
}
+void ip_sock_set_freebind(struct sock *sk);
+int ip_sock_set_mtu_discover(struct sock *sk, int val);
+void ip_sock_set_pktinfo(struct sock *sk);
+void ip_sock_set_recverr(struct sock *sk);
+void ip_sock_set_tos(struct sock *sk, int val);
+void __ip_sock_set_tos(struct sock *sk, int val);
+
#endif /* _IP_H */
diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index 7bec95df4f80..c8a96b888277 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -43,14 +43,6 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto)
skb->len, proto, 0));
}
-static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
-{
- const struct ipv6hdr *iph = skb_gro_network_header(skb);
-
- return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
- skb_gro_len(skb), proto, 0));
-}
-
static __inline__ __sum16 tcp_v6_check(int len,
const struct in6_addr *saddr,
const struct in6_addr *daddr,
@@ -65,25 +57,19 @@ static inline void __tcp_v6_send_check(struct sk_buff *skb,
{
struct tcphdr *th = tcp_hdr(skb);
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v6_check(skb->len, saddr, daddr,
- csum_partial(th, th->doff << 2,
- skb->csum));
- }
+ th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
}
-#if IS_ENABLED(CONFIG_IPV6)
-static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
+static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
- __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
+ ipv6h->payload_len = 0;
+ th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0);
}
-#endif
static inline __sum16 udp_v6_check(int len,
const struct in6_addr *saddr,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index fd60a8ac02ee..6268963d9599 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -19,6 +19,8 @@
#include <net/netlink.h>
#include <net/inetpeer.h>
#include <net/fib_notifier.h>
+#include <linux/indirect_call_wrapper.h>
+#include <uapi/linux/bpf.h>
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_HASHSZ 256
@@ -65,6 +67,7 @@ struct fib6_config {
struct nl_info fc_nlinfo;
struct nlattr *fc_encap;
u16 fc_encap_type;
+ bool fc_is_fdb;
};
struct fib6_node {
@@ -164,7 +167,7 @@ struct fib6_info {
struct fib6_node __rcu *fib6_node;
/* Multipath routes:
- * siblings is a list of fib6_info that have the the same metric/weight,
+ * siblings is a list of fib6_info that have the same metric/weight,
* destination, but not the same gateway. nsiblings is just a cache
* to speed up lookup.
*/
@@ -187,23 +190,26 @@ struct fib6_info {
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
+
+ u8 offload;
+ u8 trap;
+ u8 offload_failed;
+
u8 should_flush:1,
dst_nocount:1,
dst_nopolicy:1,
- dst_host:1,
fib6_destroying:1,
- offload:1,
- trap:1,
- unused:1;
+ unused:4;
struct rcu_head rcu;
struct nexthop *nh;
- struct fib6_nh fib6_nh[0];
+ struct fib6_nh fib6_nh[];
};
struct rt6_info {
struct dst_entry dst;
struct fib6_info __rcu *from;
+ int sernum;
struct rt6key rt6i_dst;
struct rt6key rt6i_src;
@@ -264,7 +270,7 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i)
return false;
}
-/* Function to safely get fn->sernum for passed in rt
+/* Function to safely get fn->fn_sernum for passed in rt
* and store result in passed in cookie.
* Return true if we can get cookie safely
* Return false if not
@@ -278,8 +284,8 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
fn = rcu_dereference(f6i->fib6_node);
if (fn) {
- *cookie = fn->fn_sernum;
- /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */
+ *cookie = READ_ONCE(fn->fn_sernum);
+ /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
smp_rmb();
status = true;
}
@@ -292,6 +298,9 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
struct fib6_info *from;
u32 cookie = 0;
+ if (rt->sernum)
+ return rt->sernum;
+
rcu_read_lock();
from = rcu_dereference(rt->from);
@@ -331,13 +340,6 @@ static inline void fib6_info_release(struct fib6_info *f6i)
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}
-static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload,
- bool trap)
-{
- f6i->offload = offload;
- f6i->trap = trap;
-}
-
enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
@@ -367,9 +369,8 @@ struct rt6_statistics {
__u32 fib_rt_cache; /* cached rt entries in exception table */
__u32 fib_discarded_routes; /* total number of routes delete */
- /* The following stats are not protected by any lock */
+ /* The following stat is not protected by any lock */
atomic_t fib_rt_alloc; /* total number of routes alloced */
- atomic_t fib_rt_uncache; /* rt entries in uncached list */
};
#define RTN_TL_ROOT 0x0001
@@ -486,6 +487,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void fib6_nh_release(struct fib6_nh *fib6_nh);
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
@@ -540,6 +542,50 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
{
return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}
+void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
+ bool offload, bool trap, bool offload_failed);
+
+#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__ipv6_route {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct fib6_info *, rt);
+};
+#endif
+
+INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_output(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags));
+INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_input(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags));
+INDIRECT_CALLABLE_DECLARE(struct rt6_info *__ip6_route_redirect(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags));
+INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_lookup(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags));
+static inline struct rt6_info *pol_lookup_func(pol_lookup_t lookup,
+ struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
+{
+ return INDIRECT_CALL_4(lookup,
+ ip6_pol_route_output,
+ ip6_pol_route_input,
+ ip6_pol_route_lookup,
+ __ip6_route_redirect,
+ net, table, fl6, skb, flags);
+}
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
static inline bool fib6_has_custom_rules(const struct net *net)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index b69c16cbbf71..035d61d50a98 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -2,6 +2,16 @@
#ifndef _NET_IP6_ROUTE_H
#define _NET_IP6_ROUTE_H
+#include <net/addrconf.h>
+#include <net/flow.h>
+#include <net/ip6_fib.h>
+#include <net/sock.h>
+#include <net/lwtunnel.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/route.h>
+#include <net/nexthop.h>
+
struct route_info {
__u8 type;
__u8 length;
@@ -16,19 +26,9 @@ struct route_info {
reserved_h:3;
#endif
__be32 lifetime;
- __u8 prefix[0]; /* 0,8 or 16 */
+ __u8 prefix[]; /* 0,8 or 16 */
};
-#include <net/addrconf.h>
-#include <net/flow.h>
-#include <net/ip6_fib.h>
-#include <net/sock.h>
-#include <net/lwtunnel.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/route.h>
-#include <net/nexthop.h>
-
#define RT6_LOOKUP_F_IFACE 0x00000001
#define RT6_LOOKUP_F_REACHABLE 0x00000002
#define RT6_LOOKUP_F_HAS_SADDR 0x00000004
@@ -118,12 +118,13 @@ void ip6_route_init_special_entries(void);
int ip6_route_init(void);
void ip6_route_cleanup(void);
-int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
+int ipv6_route_ioctl(struct net *net, unsigned int cmd,
+ struct in6_rtmsg *rtmsg);
int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
int ip6_ins_rt(struct net *net, struct fib6_info *f6i);
-int ip6_del_rt(struct net *net, struct fib6_info *f6i);
+int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify);
void rt6_flush_exceptions(struct fib6_info *f6i);
void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
@@ -173,7 +174,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
struct net_device *dev);
struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
- struct net_device *dev, unsigned int pref);
+ struct net_device *dev, unsigned int pref,
+ u32 defrtr_usr_metric);
void rt6_purge_dflt_routers(struct net *net);
@@ -254,19 +256,27 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
return rt->rt6i_flags & RTF_ANYCAST ||
(rt->rt6i_dst.plen < 127 &&
+ !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) &&
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
}
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
-static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
{
- struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+ const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
+ const struct dst_entry *dst = skb_dst(skb);
+ unsigned int mtu;
- return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ?
- skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+ if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+ mtu = READ_ONCE(dst->dev->mtu);
+ mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
+ } else {
+ mtu = dst_mtu(dst);
+ }
+ return mtu;
}
static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
@@ -306,15 +316,16 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
!lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
}
-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
+ bool forwarding)
{
struct inet6_dev *idev;
unsigned int mtu;
- if (dst_metric_locked(dst, RTAX_MTU)) {
+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
}
mtu = IPV6_MIN_MTU;
@@ -324,7 +335,8 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
u32 ip6_mtu_from_fib6(const struct fib6_result *res,
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 028eaea1c854..74b369bddf49 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -46,6 +46,7 @@ struct __ip6_tnl_parm {
struct ip6_tnl {
struct ip6_tnl __rcu *next; /* next tunnel in list */
struct net_device *dev; /* virtual device associated with tunnel */
+ netdevice_tracker dev_tracker;
struct net *net; /* netns for packet i/o */
struct __ip6_tnl_parm parms; /* tunnel configuration parameters */
struct flowi fl; /* flowi template for xmit */
@@ -57,7 +58,7 @@ struct ip6_tnl {
/* These fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
- __u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int hlen; /* tun_hlen + encap_hlen */
int tun_hlen; /* Precalculated header length */
int encap_hlen; /* Encap header length (FOU,GUE) */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6a1ae49809de..a378eff827c7 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -17,6 +17,7 @@
#include <linux/rcupdate.h>
#include <net/fib_notifier.h>
#include <net/fib_rules.h>
+#include <net/inet_dscp.h>
#include <net/inetpeer.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
@@ -24,7 +25,7 @@
struct fib_config {
u8 fc_dst_len;
- u8 fc_tos;
+ dscp_t fc_dscp;
u8 fc_protocol;
u8 fc_scope;
u8 fc_type;
@@ -79,6 +80,7 @@ struct fnhe_hash_bucket {
struct fib_nh_common {
struct net_device *nhc_dev;
+ netdevice_tracker nhc_dev_tracker;
int nhc_oif;
unsigned char nhc_scope;
u8 nhc_family;
@@ -111,6 +113,7 @@ struct fib_nh {
int nh_saddr_genid;
#define fib_nh_family nh_common.nhc_family
#define fib_nh_dev nh_common.nhc_dev
+#define fib_nh_dev_tracker nh_common.nhc_dev_tracker
#define fib_nh_oif nh_common.nhc_oif
#define fib_nh_flags nh_common.nhc_flags
#define fib_nh_lws nh_common.nhc_lwtstate
@@ -133,7 +136,7 @@ struct fib_info {
struct hlist_node fib_lhash;
struct list_head nh_list;
struct net *fib_net;
- int fib_treeref;
+ refcount_t fib_treeref;
refcount_t fib_clntref;
unsigned int fib_flags;
unsigned char fib_dead;
@@ -153,7 +156,7 @@ struct fib_info {
bool nh_updated;
struct nexthop *nh;
struct rcu_head rcu;
- struct fib_nh fib_nh[0];
+ struct fib_nh fib_nh[];
};
@@ -209,11 +212,12 @@ struct fib_rt_info {
u32 tb_id;
__be32 dst;
int dst_len;
- u8 tos;
+ dscp_t dscp;
u8 type;
u8 offload:1,
trap:1,
- unused:6;
+ offload_failed:1,
+ unused:5;
};
struct fib_entry_notifier_info {
@@ -221,7 +225,7 @@ struct fib_entry_notifier_info {
u32 dst;
int dst_len;
struct fib_info *fi;
- u8 tos;
+ dscp_t dscp;
u8 type;
u32 tb_id;
};
@@ -250,14 +254,13 @@ struct fib_table {
int tb_num_default;
struct rcu_head rcu;
unsigned long *tb_data;
- unsigned long __data[0];
+ unsigned long __data[];
};
struct fib_dump_filter {
u32 table_id;
/* filter_set is an optimization that an entry is set */
bool filter_set;
- bool dump_all_families;
bool dump_routes;
bool dump_exceptions;
unsigned char protocol;
@@ -438,7 +441,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
#ifdef CONFIG_IP_ROUTE_CLASSID
static inline int fib_num_tclassid_users(struct net *net)
{
- return net->ipv4.fib_num_tclassid_users;
+ return atomic_read(&net->ipv4.fib_num_tclassid_users);
}
#else
static inline int fib_num_tclassid_users(struct net *net)
@@ -448,6 +451,16 @@ static inline int fib_num_tclassid_users(struct net *net)
#endif
int fib_unmerge(struct net *net);
+static inline bool nhc_l3mdev_matches_dev(const struct fib_nh_common *nhc,
+const struct net_device *dev)
+{
+ if (nhc->nhc_dev == dev ||
+ l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex)
+ return true;
+
+ return false;
+}
+
/* Exported by fib_semantics.c */
int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
@@ -456,6 +469,49 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
+/* Fields used for sysctl_fib_multipath_hash_fields.
+ * Common to IPv4 and IPv6.
+ *
+ * Add new fields at the end. This is user API.
+ */
+#define FIB_MULTIPATH_HASH_FIELD_SRC_IP BIT(0)
+#define FIB_MULTIPATH_HASH_FIELD_DST_IP BIT(1)
+#define FIB_MULTIPATH_HASH_FIELD_IP_PROTO BIT(2)
+#define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL BIT(3)
+#define FIB_MULTIPATH_HASH_FIELD_SRC_PORT BIT(4)
+#define FIB_MULTIPATH_HASH_FIELD_DST_PORT BIT(5)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP BIT(6)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP BIT(7)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO BIT(8)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL BIT(9)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT BIT(10)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT BIT(11)
+
+#define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK \
+ (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \
+ FIB_MULTIPATH_HASH_FIELD_DST_IP | \
+ FIB_MULTIPATH_HASH_FIELD_IP_PROTO | \
+ FIB_MULTIPATH_HASH_FIELD_FLOWLABEL | \
+ FIB_MULTIPATH_HASH_FIELD_SRC_PORT | \
+ FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+
+#define FIB_MULTIPATH_HASH_FIELD_INNER_MASK \
+ (FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP | \
+ FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP | \
+ FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO | \
+ FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL | \
+ FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT | \
+ FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+
+#define FIB_MULTIPATH_HASH_FIELD_ALL_MASK \
+ (FIB_MULTIPATH_HASH_FIELD_OUTER_MASK | \
+ FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
+
+#define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK \
+ (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \
+ FIB_MULTIPATH_HASH_FIELD_DST_IP | \
+ FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys);
@@ -470,8 +526,9 @@ int fib_nh_init(struct net *net, struct fib_nh *fib_nh,
struct fib_config *cfg, int nh_weight,
struct netlink_ext_ack *extack);
void fib_nh_release(struct net *net, struct fib_nh *fib_nh);
-int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap,
- u16 fc_encap_type, void *cfg, gfp_t gfp_flags,
+int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc,
+ struct nlattr *fc_encap, u16 fc_encap_type,
+ void *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void fib_nh_common_release(struct fib_nh_common *nhc);
@@ -479,6 +536,8 @@ void fib_nh_common_release(struct fib_nh_common *nhc);
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri);
void fib_trie_init(void);
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);
+bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
+ const struct flowi4 *flp);
static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
{
@@ -541,5 +600,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh,
u8 rt_family, unsigned char *flags, bool skip_oif);
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh,
- int nh_weight, u8 rt_family);
+ int nh_weight, u8 rt_family, u32 nh_tclassid);
#endif /* _NET_FIB_H */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 236503a50759..fca357679816 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -54,6 +54,7 @@ struct ip_tunnel_key {
__be32 label; /* Flow Label for IPv6 */
__be16 tp_src;
__be16 tp_dst;
+ __u8 flow_flags;
};
/* Flags for ip_tunnel_info mode. */
@@ -104,7 +105,10 @@ struct metadata_dst;
struct ip_tunnel {
struct ip_tunnel __rcu *next;
struct hlist_node hash_node;
+
struct net_device *dev;
+ netdevice_tracker dev_tracker;
+
struct net *net; /* netns for packet i/o */
unsigned long err_time; /* Time when the last ICMP error
@@ -113,7 +117,7 @@ struct ip_tunnel {
/* These four fields used only by GRE */
u32 i_seqno; /* The last seen seqno */
- u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
/* These four fields used only by ERSPAN */
@@ -240,11 +244,19 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
int proto,
__be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
- __u32 mark, __u32 tun_inner_hash)
+ __be32 key, __u8 tos,
+ struct net *net, int oif,
+ __u32 mark, __u32 tun_inner_hash,
+ __u8 flow_flags)
{
memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
+
+ if (oif) {
+ fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif);
+ /* Legacy VRF/l3mdev use case */
+ fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif;
+ }
+
fl4->daddr = daddr;
fl4->saddr = saddr;
fl4->flowi4_tos = tos;
@@ -252,6 +264,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
fl4->fl4_gre_key = key;
fl4->flowi4_mark = mark;
fl4->flowi4_multipath_hash = tun_inner_hash;
+ fl4->flowi4_flags = flow_flags;
}
int ip_tunnel_init(struct net_device *dev);
@@ -269,12 +282,12 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const u8 proto, int tunnel_hlen);
-int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
-void ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot);
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
int link, __be16 flags,
__be32 remote, __be32 local,
@@ -289,6 +302,15 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
+bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *encap);
+
+void ip_tunnel_netlink_parms(struct nlattr *data[],
+ struct ip_tunnel_parm *parms);
+
+extern const struct header_ops ip_tunnel_header_ops;
+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
+
struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
@@ -374,9 +396,11 @@ static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
const struct sk_buff *skb)
{
- if (skb->protocol == htons(ETH_P_IP))
+ __be16 payload_protocol = skb_protocol(skb, true);
+
+ if (payload_protocol == htons(ETH_P_IP))
return iph->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (payload_protocol == htons(ETH_P_IPV6))
return ipv6_get_dsfield((const struct ipv6hdr *)iph);
else
return 0;
@@ -385,9 +409,11 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
const struct sk_buff *skb)
{
- if (skb->protocol == htons(ETH_P_IP))
+ __be16 payload_protocol = skb_protocol(skb, true);
+
+ if (payload_protocol == htons(ETH_P_IP))
return iph->ttl;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (payload_protocol == htons(ETH_P_IPV6))
return ((const struct ipv6hdr *)iph)->hop_limit;
else
return 0;
@@ -416,6 +442,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
u8 tos, u8 ttl, __be16 df, bool xnet);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags);
+int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
+ int headroom, bool reply);
int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
@@ -441,8 +469,8 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
+ u64_stats_add(&tstats->tx_bytes, pkt_len);
+ u64_stats_inc(&tstats->tx_packets);
u64_stats_update_end(&tstats->syncp);
put_cpu_ptr(tstats);
} else {
@@ -472,9 +500,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
const void *from, int len,
__be16 flags)
{
- memcpy(ip_tunnel_info_opts(info), from, len);
info->options_len = len;
- info->key.tun_flags |= flags;
+ if (len > 0) {
+ memcpy(ip_tunnel_info_opts(info), from, len);
+ info->key.tun_flags |= flags;
+ }
}
static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -520,7 +550,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
__be16 flags)
{
info->options_len = 0;
- info->key.tun_flags |= flags;
}
#endif /* CONFIG_INET */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 83be2d93b407..ff1804a0c469 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h> /* for struct rwlock_t */
#include <linux/atomic.h> /* for struct atomic_t */
#include <linux/refcount.h> /* for struct refcount_t */
+#include <linux/workqueue.h>
#include <linux/compiler.h>
#include <linux/timer.h>
@@ -24,9 +25,6 @@
#include <linux/ip.h>
#include <linux/ipv6.h> /* for struct ipv6hdr */
#include <net/ipv6.h>
-#if IS_ENABLED(CONFIG_IP_VS_IPV6)
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#endif
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#endif
@@ -874,6 +872,7 @@ struct netns_ipvs {
struct ip_vs_stats tot_stats; /* Statistics & est. */
int num_services; /* no of virtual services */
+ int num_services6; /* IPv6 virtual services */
/* Trash for destinations */
struct list_head dest_trash;
@@ -885,6 +884,8 @@ struct netns_ipvs {
atomic_t conn_out_counter;
#ifdef CONFIG_SYSCTL
+ /* delayed work for expiring no dest connections */
+ struct delayed_work expire_nodest_conn_work;
/* 1/rate drop and drop-entry variables */
struct delayed_work defense_work; /* Work handler */
int drop_rate;
@@ -930,6 +931,7 @@ struct netns_ipvs {
int sysctl_conn_reuse_mode;
int sysctl_schedule_icmp;
int sysctl_ignore_tunneled;
+ int sysctl_run_estimation;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
@@ -960,6 +962,7 @@ struct netns_ipvs {
* are not supported when synchronization is enabled.
*/
unsigned int mixed_address_family_dests;
+ unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */
};
#define DEFAULT_SYNC_THRESHOLD 3
@@ -1049,6 +1052,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
return ipvs->sysctl_conn_reuse_mode;
}
+static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_expire_nodest_conn;
+}
+
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_schedule_icmp;
@@ -1064,6 +1072,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
return ipvs->sysctl_cache_bypass;
}
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_run_estimation;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1136,6 +1149,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
return 1;
}
+static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
+{
+ return 0;
+}
+
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
{
return 0;
@@ -1151,6 +1169,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
return 0;
}
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+ return 1;
+}
+
#endif
/* IPVS core functions
@@ -1505,6 +1528,22 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
#endif
+#ifdef CONFIG_SYSCTL
+/* Enqueue delayed work for expiring no dest connections
+ * Only run when sysctl_expire_nodest=1
+ */
+static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs)
+{
+ if (sysctl_expire_nodest_conn(ipvs))
+ queue_delayed_work(system_long_wq,
+ &ipvs->expire_nodest_conn_work, 1);
+}
+
+void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs);
+#else
+static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) {}
+#endif
+
#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \
IP_VS_CONN_F_FWD_MASK)
@@ -1624,18 +1663,16 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
}
#endif /* CONFIG_IP_VS_NFCT */
-/* Really using conntrack? */
-static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp,
- struct sk_buff *skb)
+/* Using old conntrack that can not be redirected to another real server? */
+static inline bool ip_vs_conn_uses_old_conntrack(struct ip_vs_conn *cp,
+ struct sk_buff *skb)
{
#ifdef CONFIG_IP_VS_NFCT
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- if (!(cp->flags & IP_VS_CONN_F_NFCT))
- return false;
ct = nf_ct_get(skb, &ctinfo);
- if (ct)
+ if (ct && nf_ct_is_confirmed(ct))
return true;
#endif
return false;
@@ -1670,6 +1707,9 @@ static inline void ip_vs_unregister_conntrack(struct ip_vs_service *svc)
#endif
}
+int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af);
+void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af);
+
static inline int
ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
{
@@ -1683,4 +1723,15 @@ ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
atomic_read(&dest->inactconns);
}
+#ifdef CONFIG_IP_VS_PROTO_TCP
+INDIRECT_CALLABLE_DECLARE(int
+ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+INDIRECT_CALLABLE_DECLARE(int
+ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
+#endif
#endif /* _NET_IP_VS_H */
diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index fee6fc451597..8660a2a6d1fc 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -2,11 +2,13 @@
#ifndef _NET_IPCOMP_H
#define _NET_IPCOMP_H
+#include <linux/skbuff.h>
#include <linux/types.h>
#define IPCOMP_SCRATCH_SIZE 65400
struct crypto_comp;
+struct ip_comp_hdr;
struct ipcomp_data {
u16 threshold;
@@ -20,7 +22,7 @@ struct xfrm_state;
int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb);
int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb);
void ipcomp_destroy(struct xfrm_state *x);
-int ipcomp_init_state(struct xfrm_state *x);
+int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack);
static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb)
{
diff --git a/include/net/ipconfig.h b/include/net/ipconfig.h
index e3534299bd2a..8276897d0c2e 100644
--- a/include/net/ipconfig.h
+++ b/include/net/ipconfig.h
@@ -7,6 +7,8 @@
/* The following are initdata: */
+#include <linux/types.h>
+
extern int ic_proto_enabled; /* Protocols enabled (see IC_xxx) */
extern int ic_set_manually; /* IPconfig parameters set manually */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index cec1a54401f2..37943ba3a73c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -15,12 +15,14 @@
#include <linux/refcount.h>
#include <linux/jump_label_ratelimit.h>
#include <net/if_inet6.h>
-#include <net/ndisc.h>
#include <net/flow.h>
#include <net/flow_dissector.h>
+#include <net/inet_dscp.h>
#include <net/snmp.h>
#include <net/netns/hash.h>
+struct ip_tunnel_info;
+
#define SIN6_LEN_RFC2133 24
#define IPV6_MAXPLEN 65535
@@ -30,6 +32,7 @@
*/
#define NEXTHDR_HOP 0 /* Hop-by-hop option header. */
+#define NEXTHDR_IPV4 4 /* IPv4 in IPv6 */
#define NEXTHDR_TCP 6 /* TCP segment. */
#define NEXTHDR_UDP 17 /* UDP message. */
#define NEXTHDR_IPV6 41 /* IPv6 in IPv6 */
@@ -148,6 +151,17 @@ struct frag_hdr {
__be32 identification;
};
+/*
+ * Jumbo payload option, as described in RFC 2675 2.
+ */
+struct hop_jumbo_hdr {
+ u8 nexthdr;
+ u8 hdrlen;
+ u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */
+ u8 tlv_len; /* 4 */
+ __be32 jumbo_payload_len;
+};
+
#define IP6_MF 0x0001
#define IP6_OFFSET 0xFFF8
@@ -344,9 +358,9 @@ struct ipcm6_cookie {
struct sockcm_cookie sockc;
__s16 hlimit;
__s16 tclass;
+ __u16 gso_size;
__s8 dontfrag;
struct ipv6_txoptions *opt;
- __u16 gso_size;
};
static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
@@ -390,23 +404,26 @@ static inline void txopt_put(struct ipv6_txoptions *opt)
kfree_rcu(opt, rcu);
}
+#if IS_ENABLED(CONFIG_IPV6)
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label);
extern struct static_key_false_deferred ipv6_flowlabel_exclusive;
static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
__be32 label)
{
- if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key))
+ if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) &&
+ READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl))
return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);
return NULL;
}
+#endif
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl,
struct ipv6_txoptions *fopt);
void fl6_free_socklist(struct sock *sk);
-int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen);
+int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen);
int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
@@ -434,14 +451,55 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
struct ipv6_txoptions *opt,
int newtype,
struct ipv6_opt_hdr *newopt);
-struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
- struct ipv6_txoptions *opt);
+struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+ struct ipv6_txoptions *opt);
+
+static inline struct ipv6_txoptions *
+ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt)
+{
+ if (!opt)
+ return NULL;
+ return __ipv6_fixup_options(opt_space, opt);
+}
bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
const struct inet6_skb_parm *opt);
struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
struct ipv6_txoptions *opt);
+/* This helper is specialized for BIG TCP needs.
+ * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
+ * It assumes headers are already in skb->head.
+ * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there.
+ */
+static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
+{
+ const struct hop_jumbo_hdr *jhdr;
+ const struct ipv6hdr *nhdr;
+
+ if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
+ return 0;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return 0;
+
+ if (skb_network_offset(skb) +
+ sizeof(struct ipv6hdr) +
+ sizeof(struct hop_jumbo_hdr) > skb_headlen(skb))
+ return 0;
+
+ nhdr = ipv6_hdr(skb);
+
+ if (nhdr->nexthdr != NEXTHDR_HOP)
+ return 0;
+
+ jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1);
+ if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
+ jhdr->nexthdr != IPPROTO_TCP)
+ return 0;
+ return jhdr->nexthdr;
+}
+
static inline bool ipv6_accept_ra(struct inet6_dev *idev)
{
/* If forwarding is enabled, RA are not accepted unless the special
@@ -908,7 +966,6 @@ static inline int ip6_default_np_autolabel(struct net *net)
}
}
#else
-static inline void ip6_set_txhash(struct sock *sk) { }
static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
__be32 flowlabel, bool autolabel,
struct flowi6 *fl6)
@@ -926,11 +983,19 @@ static inline int ip6_multipath_hash_policy(const struct net *net)
{
return net->ipv6.sysctl.multipath_hash_policy;
}
+static inline u32 ip6_multipath_hash_fields(const struct net *net)
+{
+ return net->ipv6.sysctl.multipath_hash_fields;
+}
#else
static inline int ip6_multipath_hash_policy(const struct net *net)
{
return 0;
}
+static inline u32 ip6_multipath_hash_fields(const struct net *net)
+{
+ return 0;
+}
#endif
/*
@@ -957,6 +1022,11 @@ static inline u8 ip6_tclass(__be32 flowinfo)
return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
}
+static inline dscp_t ip6_dscp(__be32 flowinfo)
+{
+ return inet_dsfield_to_dscp(ip6_tclass(flowinfo));
+}
+
static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
{
return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
@@ -993,7 +1063,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags);
@@ -1009,8 +1079,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
- struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
+ void *from, size_t length, int transhdrlen,
+ struct ipcm6_cookie *ipc6,
struct rt6_info *rt, unsigned int flags,
struct inet_cork_full *cork);
@@ -1027,6 +1097,12 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
bool connected);
+struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net, struct socket *sock,
+ struct in6_addr *saddr,
+ const struct ip_tunnel_info *info,
+ u8 protocol, bool use_cache);
struct dst_entry *ip6_blackhole_route(struct net *net,
struct dst_entry *orig_dst);
@@ -1078,15 +1154,16 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
/*
* socket options (ipv6_sockglue.c)
*/
-
-int ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount);
+
+int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
+int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
+int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen);
int ipv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr,
int addr_len);
@@ -1105,11 +1182,15 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
+void inet6_cleanup_sock(struct sock *sk);
+void inet6_sock_destruct(struct sock *sk);
int inet6_release(struct socket *sock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+int inet6_compat_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg);
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk);
@@ -1129,9 +1210,10 @@ struct group_filter;
int ip6_mc_source(int add, int omode, struct sock *sk,
struct group_source_req *pgsr);
-int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
+ struct sockaddr_storage *list);
int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
- struct group_filter __user *optval, int __user *optlen);
+ sockptr_t optval, size_t ss_offset);
#ifdef CONFIG_PROC_FS
int ac6_proc_init(struct net *net);
@@ -1169,4 +1251,96 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
const struct in6_addr *addr, unsigned int mode);
int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
+
+static inline int ip6_sock_set_v6only(struct sock *sk)
+{
+ if (inet_sk(sk)->inet_num)
+ return -EINVAL;
+ lock_sock(sk);
+ sk->sk_ipv6only = true;
+ release_sock(sk);
+ return 0;
+}
+
+static inline void ip6_sock_set_recverr(struct sock *sk)
+{
+ lock_sock(sk);
+ inet6_sk(sk)->recverr = true;
+ release_sock(sk);
+}
+
+static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+{
+ unsigned int pref = 0;
+ unsigned int prefmask = ~0;
+
+ /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+ switch (val & (IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP |
+ IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+ case IPV6_PREFER_SRC_PUBLIC:
+ pref |= IPV6_PREFER_SRC_PUBLIC;
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case IPV6_PREFER_SRC_TMP:
+ pref |= IPV6_PREFER_SRC_TMP;
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check HOME/COA conflicts */
+ switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) {
+ case IPV6_PREFER_SRC_HOME:
+ prefmask &= ~IPV6_PREFER_SRC_COA;
+ break;
+ case IPV6_PREFER_SRC_COA:
+ pref |= IPV6_PREFER_SRC_COA;
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check CGA/NONCGA conflicts */
+ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+ case IPV6_PREFER_SRC_CGA:
+ case IPV6_PREFER_SRC_NONCGA:
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+ return 0;
+}
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = __ip6_sock_set_addr_preferences(sk, val);
+ release_sock(sk);
+ return ret;
+}
+
+static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
+{
+ lock_sock(sk);
+ inet6_sk(sk)->rxopt.bits.rxinfo = true;
+ release_sock(sk);
+}
+
#endif /* _NET_IPV6_H */
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index a21e8b1381a1..5052c66e22d2 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _IPV6_FRAG_H
#define _IPV6_FRAG_H
+#include <linux/icmpv6.h>
#include <linux/kernel.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
@@ -67,7 +68,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
struct sk_buff *head;
rcu_read_lock();
- if (fq->q.fqdir->dead)
+ /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(fq->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&fq->q.lock);
@@ -108,5 +110,35 @@ out_rcu_unlock:
rcu_read_unlock();
inet_frag_put(&fq->q);
}
+
+/* Check if the upper layer header is truncated in the first fragment. */
+static inline bool
+ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp)
+{
+ u8 nexthdr = *nexthdrp;
+ __be16 frag_off;
+ int offset;
+
+ offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off);
+ if (offset < 0 || (frag_off & htons(IP6_OFFSET)))
+ return false;
+ switch (nexthdr) {
+ case NEXTHDR_TCP:
+ offset += sizeof(struct tcphdr);
+ break;
+ case NEXTHDR_UDP:
+ offset += sizeof(struct udphdr);
+ break;
+ case NEXTHDR_ICMP:
+ offset += sizeof(struct icmp6hdr);
+ break;
+ default:
+ offset += 1;
+ }
+ if (offset > skb->len)
+ return true;
+ return false;
+}
+
#endif
#endif
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 3e7d2c0e79ca..c48186bf4737 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -47,8 +47,9 @@ struct ipv6_stub {
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+ void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
- int (*ip6_del_rt)(struct net *net, struct fib6_info *rt);
+ int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
struct nl_info *info);
@@ -56,19 +57,34 @@ struct ipv6_stub {
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
bool router, bool solicited, bool override, bool inc_opt);
+#if IS_ENABLED(CONFIG_XFRM)
+ void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
+ int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+ int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type);
+#endif
struct neigh_table *nd_tbl;
+
+ int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *));
+ struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev);
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
struct ipv6_bpf_stub {
int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock);
+ u32 flags);
struct sock *(*udp6_lib_lookup)(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *tbl,
struct sk_buff *skb);
+ int (*ipv6_setsockopt)(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen);
+ int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen);
};
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
diff --git a/include/net/ipx.h b/include/net/ipx.h
deleted file mode 100644
index 9d1342807b59..000000000000
--- a/include/net/ipx.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_INET_IPX_H_
-#define _NET_INET_IPX_H_
-/*
- * The following information is in its entirety obtained from:
- *
- * Novell 'IPX Router Specification' Version 1.10
- * Part No. 107-000029-001
- *
- * Which is available from ftp.novell.com
- */
-
-#include <linux/netdevice.h>
-#include <net/datalink.h>
-#include <linux/ipx.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/refcount.h>
-
-struct ipx_address {
- __be32 net;
- __u8 node[IPX_NODE_LEN];
- __be16 sock;
-};
-
-#define ipx_broadcast_node "\377\377\377\377\377\377"
-#define ipx_this_node "\0\0\0\0\0\0"
-
-#define IPX_MAX_PPROP_HOPS 8
-
-struct ipxhdr {
- __be16 ipx_checksum __packed;
-#define IPX_NO_CHECKSUM cpu_to_be16(0xFFFF)
- __be16 ipx_pktsize __packed;
- __u8 ipx_tctrl;
- __u8 ipx_type;
-#define IPX_TYPE_UNKNOWN 0x00
-#define IPX_TYPE_RIP 0x01 /* may also be 0 */
-#define IPX_TYPE_SAP 0x04 /* may also be 0 */
-#define IPX_TYPE_SPX 0x05 /* SPX protocol */
-#define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */
-#define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast */
- struct ipx_address ipx_dest __packed;
- struct ipx_address ipx_source __packed;
-};
-
-/* From af_ipx.c */
-extern int sysctl_ipx_pprop_broadcasting;
-
-struct ipx_interface {
- /* IPX address */
- __be32 if_netnum;
- unsigned char if_node[IPX_NODE_LEN];
- refcount_t refcnt;
-
- /* physical device info */
- struct net_device *if_dev;
- struct datalink_proto *if_dlink;
- __be16 if_dlink_type;
-
- /* socket support */
- unsigned short if_sknum;
- struct hlist_head if_sklist;
- spinlock_t if_sklist_lock;
-
- /* administrative overhead */
- int if_ipx_offset;
- unsigned char if_internal;
- unsigned char if_primary;
-
- struct list_head node; /* node in ipx_interfaces list */
-};
-
-struct ipx_route {
- __be32 ir_net;
- struct ipx_interface *ir_intrfc;
- unsigned char ir_routed;
- unsigned char ir_router_node[IPX_NODE_LEN];
- struct list_head node; /* node in ipx_routes list */
- refcount_t refcnt;
-};
-
-struct ipx_cb {
- u8 ipx_tctrl;
- __be32 ipx_dest_net;
- __be32 ipx_source_net;
- struct {
- __be32 netnum;
- int index;
- } last_hop;
-};
-
-#include <net/sock.h>
-
-struct ipx_sock {
- /* struct sock has to be the first member of ipx_sock */
- struct sock sk;
- struct ipx_address dest_addr;
- struct ipx_interface *intrfc;
- __be16 port;
-#ifdef CONFIG_IPX_INTERN
- unsigned char node[IPX_NODE_LEN];
-#endif
- unsigned short type;
- /*
- * To handle special ncp connection-handling sockets for mars_nwe,
- * the connection number must be stored in the socket.
- */
- unsigned short ipx_ncp_conn;
-};
-
-static inline struct ipx_sock *ipx_sk(struct sock *sk)
-{
- return (struct ipx_sock *)sk;
-}
-
-#define IPX_SKB_CB(__skb) ((struct ipx_cb *)&((__skb)->cb[0]))
-
-#define IPX_MIN_EPHEMERAL_SOCKET 0x4000
-#define IPX_MAX_EPHEMERAL_SOCKET 0x7fff
-
-extern struct list_head ipx_routes;
-extern rwlock_t ipx_routes_lock;
-
-extern struct list_head ipx_interfaces;
-struct ipx_interface *ipx_interfaces_head(void);
-extern spinlock_t ipx_interfaces_lock;
-
-extern struct ipx_interface *ipx_primary_net;
-
-int ipx_proc_init(void);
-void ipx_proc_exit(void);
-
-const char *ipx_frame_name(__be16);
-const char *ipx_device_name(struct ipx_interface *intrfc);
-
-static __inline__ void ipxitf_hold(struct ipx_interface *intrfc)
-{
- refcount_inc(&intrfc->refcnt);
-}
-
-void ipxitf_down(struct ipx_interface *intrfc);
-struct ipx_interface *ipxitf_find_using_net(__be32 net);
-int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node);
-__be16 ipx_cksum(struct ipxhdr *packet, int length);
-int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
- unsigned char *node);
-void ipxrtr_del_routes(struct ipx_interface *intrfc);
-int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
- struct msghdr *msg, size_t len, int noblock);
-int ipxrtr_route_skb(struct sk_buff *skb);
-struct ipx_route *ipxrtr_lookup(__be32 net);
-int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
-
-static __inline__ void ipxitf_put(struct ipx_interface *intrfc)
-{
- if (refcount_dec_and_test(&intrfc->refcnt))
- ipxitf_down(intrfc);
-}
-
-static __inline__ void ipxrtr_hold(struct ipx_route *rt)
-{
- refcount_inc(&rt->refcnt);
-}
-
-static __inline__ void ipxrtr_put(struct ipx_route *rt)
-{
- if (refcount_dec_and_test(&rt->refcnt))
- kfree(rt);
-}
-#endif /* _NET_INET_IPX_H_ */
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 14a490246be9..df85d19fbf84 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -112,10 +112,12 @@ enum iucv_tx_notify {
struct iucv_sock {
struct sock sk;
- char src_user_id[8];
- char src_name[8];
- char dst_user_id[8];
- char dst_name[8];
+ struct_group(init,
+ char src_user_id[8];
+ char src_name[8];
+ char dst_user_id[8];
+ char dst_name[8];
+ );
struct list_head accept_q;
spinlock_t accept_q_lock;
struct sock *parent;
@@ -128,11 +130,12 @@ struct iucv_sock {
u8 flags;
u16 msglimit;
u16 msglimit_peer;
+ atomic_t skbs_in_xmit;
atomic_t msg_sent;
atomic_t msg_recv;
atomic_t pendings;
int transport;
- void (*sk_txnotify)(struct sk_buff *skb,
+ void (*sk_txnotify)(struct sock *sk,
enum iucv_tx_notify n);
};
@@ -158,12 +161,4 @@ struct iucv_sock_list {
atomic_t autobind_name;
};
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait);
-void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
-void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
-void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
-void iucv_accept_unlink(struct sock *sk);
-struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock);
-
#endif /* __IUCV_H */
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index e942372b077b..031c661aa14d 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -10,6 +10,16 @@
#include <net/dst.h>
#include <net/fib_rules.h>
+enum l3mdev_type {
+ L3MDEV_TYPE_UNSPEC,
+ L3MDEV_TYPE_VRF,
+ __L3MDEV_TYPE_MAX
+};
+
+#define L3MDEV_TYPE_MAX (__L3MDEV_TYPE_MAX - 1)
+
+typedef int (*lookup_by_table_id_t)(struct net *net, u32 table_d);
+
/**
* struct l3mdev_ops - l3mdev operations
*
@@ -37,6 +47,15 @@ struct l3mdev_ops {
#ifdef CONFIG_NET_L3_MASTER_DEV
+int l3mdev_table_lookup_register(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn);
+
+void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn);
+
+int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
+ u32 table_id);
+
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg);
@@ -281,6 +300,26 @@ struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
}
static inline
+int l3mdev_table_lookup_register(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline
+void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+}
+
+static inline
+int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
+ u32 table_id)
+{
+ return -ENODEV;
+}
+
+static inline
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg)
{
diff --git a/include/net/lapb.h b/include/net/lapb.h
index ccc3d1f020b0..124ee122f2c8 100644
--- a/include/net/lapb.h
+++ b/include/net/lapb.h
@@ -92,6 +92,7 @@ struct lapb_cb {
unsigned short n2, n2count;
unsigned short t1, t2;
struct timer_list t1timer, t2timer;
+ bool t1timer_running, t2timer_running;
/* Internal control information */
struct sk_buff_head write_queue;
@@ -103,6 +104,7 @@ struct lapb_cb {
struct lapb_frame frmr_data;
unsigned char frmr_type;
+ spinlock_t lock;
refcount_t refcnt;
};
diff --git a/include/net/llc.h b/include/net/llc.h
index df282d9b4017..e250dca03963 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -72,7 +72,9 @@ struct llc_sap {
static inline
struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex)
{
- return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES];
+ u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS);
+
+ return &sap->sk_dev_hash[bucket];
}
static inline
@@ -133,7 +135,7 @@ static inline void llc_sap_put(struct llc_sap *sap)
struct llc_sap *llc_sap_find(unsigned char sap_value);
int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb,
- unsigned char *dmac, unsigned char dsap);
+ const unsigned char *dmac, unsigned char dsap);
void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb);
void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb);
diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h
index e766300b3e99..3e1f76786d7b 100644
--- a/include/net/llc_c_ac.h
+++ b/include/net/llc_c_ac.h
@@ -16,6 +16,13 @@
* Connection state transition actions
* (Fb = F bit; Pb = P bit; Xb = X bit)
*/
+
+#include <linux/types.h>
+
+struct sk_buff;
+struct sock;
+struct timer_list;
+
#define LLC_CONN_AC_CLR_REMOTE_BUSY 1
#define LLC_CONN_AC_CONN_IND 2
#define LLC_CONN_AC_CONN_CONFIRM 3
diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h
index 48f3f891b2f9..53823d61d8b6 100644
--- a/include/net/llc_c_st.h
+++ b/include/net/llc_c_st.h
@@ -11,6 +11,10 @@
*
* See the GNU General Public License for more details.
*/
+
+#include <net/llc_c_ac.h>
+#include <net/llc_c_ev.h>
+
/* Connection component state management */
/* connection states */
#define LLC_CONN_OUT_OF_SVC 0 /* prior to allocation */
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index ea985aa7a6c5..2c1ea3414640 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -38,6 +38,7 @@ struct llc_sock {
struct llc_addr laddr; /* lsap/mac pair */
struct llc_addr daddr; /* dsap/mac pair */
struct net_device *dev; /* device to send to remote */
+ netdevice_tracker dev_tracker;
u32 copied_seq; /* head of yet unread data */
u8 retry_count; /* number of retries */
u8 ack_must_be_send;
diff --git a/include/net/llc_if.h b/include/net/llc_if.h
index 8d5c543cd620..c72570a21a4f 100644
--- a/include/net/llc_if.h
+++ b/include/net/llc_if.h
@@ -62,7 +62,8 @@
#define LLC_STATUS_CONFLICT 7 /* disconnect conn */
#define LLC_STATUS_RESET_DONE 8 /* */
-int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap);
+int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac,
+ u8 dsap);
int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb);
int llc_send_disc(struct sock *sk);
#endif /* LLC_IF_H */
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index c0f0a13ed818..49aa79c7b278 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -15,9 +15,11 @@
#include <linux/if_ether.h>
/* Lengths of frame formats */
-#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */
-#define LLC_PDU_LEN_S 4
-#define LLC_PDU_LEN_U 3 /* header and 1 control byte */
+#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */
+#define LLC_PDU_LEN_S 4
+#define LLC_PDU_LEN_U 3 /* header and 1 control byte */
+/* header and 1 control byte and XID info */
+#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info))
/* Known SAP addresses */
#define LLC_GLOBAL_SAP 0xFF
#define LLC_NULL_SAP 0x00 /* not network-layer visible */
@@ -50,9 +52,10 @@
#define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */
#define LLC_PDU_TYPE_MASK 0x03
-#define LLC_PDU_TYPE_I 0 /* first bit */
-#define LLC_PDU_TYPE_S 1 /* first two bits */
-#define LLC_PDU_TYPE_U 3 /* first two bits */
+#define LLC_PDU_TYPE_I 0 /* first bit */
+#define LLC_PDU_TYPE_S 1 /* first two bits */
+#define LLC_PDU_TYPE_U 3 /* first two bits */
+#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */
#define LLC_PDU_TYPE_IS_I(pdu) \
((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0)
@@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
u8 ssap, u8 dsap, u8 cr)
{
- const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
+ int hlen = 4; /* default value for I and S types */
struct llc_pdu_un *pdu;
+ switch (type) {
+ case LLC_PDU_TYPE_U:
+ hlen = 3;
+ break;
+ case LLC_PDU_TYPE_U_XID:
+ hlen = 6;
+ break;
+ }
+
skb_push(skb, hlen);
skb_reset_network_header(skb);
pdu = llc_pdu_un_hdr(skb);
@@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb,
xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */
xid_info->type = svcs_supported;
xid_info->rw = rx_window << 1; /* size of receive window */
- skb_put(skb, sizeof(struct llc_xid_info));
+
+ /* no need to push/put since llc_pdu_header_init() has already
+ * pushed 3 + 3 bytes
+ */
}
/**
diff --git a/include/net/llc_s_ac.h b/include/net/llc_s_ac.h
index a61b98c108ee..f71790305bc9 100644
--- a/include/net/llc_s_ac.h
+++ b/include/net/llc_s_ac.h
@@ -11,6 +11,10 @@
*
* See the GNU General Public License for more details.
*/
+
+struct llc_sap;
+struct sk_buff;
+
/* SAP component actions */
#define SAP_ACT_UNITDATA_IND 1
#define SAP_ACT_SEND_UI 2
diff --git a/include/net/llc_s_ev.h b/include/net/llc_s_ev.h
index 84db3a59ed28..fb7df1d70af3 100644
--- a/include/net/llc_s_ev.h
+++ b/include/net/llc_s_ev.h
@@ -13,6 +13,7 @@
*/
#include <linux/skbuff.h>
+#include <net/llc.h>
/* Defines SAP component events */
/* Types of events (possible values in 'ev->type') */
diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h
index c4359e203013..ed5b2fa40d32 100644
--- a/include/net/llc_s_st.h
+++ b/include/net/llc_s_st.h
@@ -12,6 +12,12 @@
* See the GNU General Public License for more details.
*/
+#include <linux/types.h>
+#include <net/llc_s_ac.h>
+#include <net/llc_s_ev.h>
+
+struct llc_sap_state_trans;
+
#define LLC_NR_SAP_STATES 2 /* size of state table */
/* structures and types */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 5d6c5b1fc695..6f15e6fa154e 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -30,11 +30,11 @@ struct lwtunnel_state {
int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb);
int (*orig_input)(struct sk_buff *);
struct rcu_head rcu;
- __u8 data[0];
+ __u8 data[];
};
struct lwtunnel_encap_ops {
- int (*build_state)(struct nlattr *encap,
+ int (*build_state)(struct net *net, struct nlattr *encap,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
struct netlink_ext_ack *extack);
@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
};
#ifdef CONFIG_LWTUNNEL
+
+DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
+
void lwtstate_free(struct lwtunnel_state *lws);
static inline struct lwtunnel_state *
@@ -113,7 +116,7 @@ int lwtunnel_valid_encap_type(u16 encap_type,
struct netlink_ext_ack *extack);
int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
struct netlink_ext_ack *extack);
-int lwtunnel_build_state(u16 encap_type,
+int lwtunnel_build_state(struct net *net, u16 encap_type,
struct nlattr *encap,
unsigned int family, const void *cfg,
struct lwtunnel_state **lws,
@@ -209,7 +212,7 @@ static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
return 0;
}
-static inline int lwtunnel_build_state(u16 encap_type,
+static inline int lwtunnel_build_state(struct net *net, u16 encap_type,
struct nlattr *encap,
unsigned int family, const void *cfg,
struct lwtunnel_state **lws,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 77e6b5a83b06..ac2bad57933f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
*/
#ifndef MAC80211_H
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/skbuff.h>
#include <linux/ieee80211.h>
+#include <linux/lockdep.h>
#include <net/cfg80211.h>
#include <net/codel.h>
#include <net/ieee80211_radiotap.h>
@@ -125,6 +126,22 @@
* via the usual ieee80211_tx_dequeue).
*/
+/**
+ * DOC: HW timestamping
+ *
+ * Timing Measurement and Fine Timing Measurement require accurate timestamps
+ * of the action frames TX/RX and their respective acks.
+ *
+ * To report hardware timestamps for Timing Measurement or Fine Timing
+ * Measurement frame RX, the low level driver should set the SKB's hwtstamp
+ * field to the frame RX timestamp and report the ack TX timestamp in the
+ * ieee80211_rx_status struct.
+ *
+ * Similarly, To report hardware timestamps for Timing Measurement or Fine
+ * Timing Measurement frame TX, the driver should set the SKB's hwtstamp field
+ * to the frame TX timestamp and report the ack RX timestamp in the
+ * ieee80211_tx_status struct.
+ */
struct device;
/**
@@ -230,7 +247,7 @@ struct ieee80211_chanctx_conf {
bool radar_enabled;
- u8 drv_priv[0] __aligned(sizeof(void *));
+ u8 drv_priv[] __aligned(sizeof(void *));
};
/**
@@ -261,11 +278,13 @@ enum ieee80211_chanctx_switch_mode {
* done.
*
* @vif: the vif that should be switched from old_ctx to new_ctx
+ * @link_conf: the link conf that's switching
* @old_ctx: the old context to which the vif was assigned
* @new_ctx: the new context to which the vif must be assigned
*/
struct ieee80211_vif_chanctx_switch {
struct ieee80211_vif *vif;
+ struct ieee80211_bss_conf *link_conf;
struct ieee80211_chanctx_conf *old_ctx;
struct ieee80211_chanctx_conf *new_ctx;
};
@@ -273,8 +292,8 @@ struct ieee80211_vif_chanctx_switch {
/**
* enum ieee80211_bss_change - BSS change notification flags
*
- * These flags are used with the bss_info_changed() callback
- * to indicate which BSS parameter changed.
+ * These flags are used with the bss_info_changed(), link_info_changed()
+ * and vif_cfg_changed() callbacks to indicate which parameter(s) changed.
*
* @BSS_CHANGED_ASSOC: association status changed (associated/disassociated),
* also implies a change in the AID.
@@ -316,6 +335,10 @@ struct ieee80211_vif_chanctx_switch {
* functionality changed for this BSS (AP mode).
* @BSS_CHANGED_TWT: TWT status changed
* @BSS_CHANGED_HE_OBSS_PD: OBSS Packet Detection status changed.
+ * @BSS_CHANGED_HE_BSS_COLOR: BSS Color has changed
+ * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed.
+ * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response
+ * status changed.
*
*/
enum ieee80211_bss_change {
@@ -348,6 +371,9 @@ enum ieee80211_bss_change {
BSS_CHANGED_FTM_RESPONDER = 1<<26,
BSS_CHANGED_TWT = 1<<27,
BSS_CHANGED_HE_OBSS_PD = 1<<28,
+ BSS_CHANGED_HE_BSS_COLOR = 1<<29,
+ BSS_CHANGED_FILS_DISCOVERY = 1<<30,
+ BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
/* when adding here, make sure to change ieee80211_reconfig */
};
@@ -489,14 +515,26 @@ struct ieee80211_ftm_responder_params {
};
/**
+ * struct ieee80211_fils_discovery - FILS discovery parameters from
+ * IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
+ *
+ * @min_interval: Minimum packet interval in TUs (0 - 10000)
+ * @max_interval: Maximum packet interval in TUs (0 - 10000)
+ */
+struct ieee80211_fils_discovery {
+ u32 min_interval;
+ u32 max_interval;
+};
+
+/**
* struct ieee80211_bss_conf - holds the BSS's changing parameters
*
* This structure keeps information about a BSS (and an association
* to that BSS) that can change during the lifetime of the BSS.
*
- * @bss_color: 6-bit value to mark inter-BSS frame, if BSS supports HE
+ * @addr: (link) address used locally
+ * @link_id: link ID, or 0 for non-MLO
* @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
- * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK
* @uora_exists: is the UORA element advertised by AP
* @ack_enabled: indicates support to receive a multi-TID that solicits either
* ACK, BACK or both
@@ -507,11 +545,8 @@ struct ieee80211_ftm_responder_params {
* mode only, set if the AP advertises TWT responder role)
* @twt_responder: does this BSS support TWT requester (relevant for managed
* mode only, set if the AP advertises TWT responder role)
- * @assoc: association status
- * @ibss_joined: indicates whether this station is part of an IBSS
- * or not
- * @ibss_creator: indicates if a new IBSS network is being created
- * @aid: association ID number, valid only when @assoc is true
+ * @twt_protected: does this BSS support protected TWT frames
+ * @twt_broadcast: does this BSS support broadcast TWT
* @use_cts_prot: use CTS protection
* @use_short_preamble: use 802.11b short preamble
* @use_short_slot: use short slot time (only relevant for ERP)
@@ -532,6 +567,8 @@ struct ieee80211_ftm_responder_params {
* IMPORTANT: These three sync_* parameters would possibly be out of sync
* by the time the driver will use them. The synchronized view is currently
* guaranteed only in certain callbacks.
+ * Note also that this is not used with MLD associations, mac80211 doesn't
+ * know how to track beacons for all of the links for this.
* @beacon_int: beacon interval
* @assoc_capability: capabilities taken from assoc resp
* @basic_rates: bitmap of basic rates, each bit stands for an
@@ -557,23 +594,9 @@ struct ieee80211_ftm_responder_params {
* threshold event and can't be enabled simultaneously with it.
* @cqm_rssi_high: Connection quality monitor RSSI upper threshold.
* @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis
- * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
- * may filter ARP queries targeted for other addresses than listed here.
- * The driver must allow ARP queries targeted for all address listed here
- * to pass through. An empty list implies no ARP queries need to pass.
- * @arp_addr_cnt: Number of addresses currently on the list. Note that this
- * may be larger than %IEEE80211_BSS_ARP_ADDR_LIST_LEN (the arp_addr_list
- * array size), it's up to the driver what to do in that case.
* @qos: This is a QoS-enabled BSS.
- * @idle: This interface is idle. There's also a global idle flag in the
- * hardware config which may be more appropriate depending on what
- * your driver/device needs to do.
- * @ps: power-save mode (STA only). This flag is NOT affected by
- * offchannel/dynamic_ps operations.
- * @ssid: The SSID of the current vif. Valid in AP and IBSS mode.
- * @ssid_len: Length of SSID given in @ssid.
* @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode.
- * @txpower: TX power in dBm
+ * @txpower: TX power in dBm. INT_MIN means not configured.
* @txpower_type: TX power adjustment used to control per packet Transmit
* Power Control (TPC) in lower driver for the current vif. In particular
* TPC is enabled if value passed in %txpower_type is
@@ -602,25 +625,47 @@ struct ieee80211_ftm_responder_params {
* nontransmitted BSSIDs
* @profile_periodicity: the least number of beacon frames need to be received
* in order to discover all the nontransmitted BSSIDs in the set.
- * @he_operation: HE operation information of the AP we are connected to
+ * @he_oper: HE operation information of the BSS (AP/Mesh) or of the AP we are
+ * connected to (STA)
* @he_obss_pd: OBSS Packet Detection parameters.
+ * @he_bss_color: BSS coloring settings, if BSS supports HE
+ * @fils_discovery: FILS discovery configuration
+ * @unsol_bcast_probe_resp_interval: Unsolicited broadcast probe response
+ * interval.
+ * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed
+ * to driver when rate control is offloaded to firmware.
+ * @power_type: power type of BSS for 6 GHz
+ * @tx_pwr_env: transmit power envelope array of BSS.
+ * @tx_pwr_env_num: number of @tx_pwr_env.
+ * @pwr_reduction: power constraint of BSS.
+ * @eht_support: does this BSS support EHT
+ * @csa_active: marks whether a channel switch is going on. Internally it is
+ * write-protected by sdata_lock and local->mtx so holding either is fine
+ * for read access.
+ * @mu_mimo_owner: indicates interface owns MU-MIMO capability
+ * @chanctx_conf: The channel context this interface is assigned to, or %NULL
+ * when it is not assigned. This pointer is RCU-protected due to the TX
+ * path needing to access it; even though the netdev carrier will always
+ * be off when it is %NULL there can still be races and packets could be
+ * processed after it switches back to %NULL.
+ * @color_change_active: marks whether a color change is ongoing. Internally it is
+ * write-protected by sdata_lock and local->mtx so holding either is fine
+ * for read access.
+ * @color_change_color: the bss color that will be used after the change.
*/
struct ieee80211_bss_conf {
const u8 *bssid;
- u8 bss_color;
+ unsigned int link_id;
+ u8 addr[ETH_ALEN] __aligned(2);
u8 htc_trig_based_pkt_ext;
- bool multi_sta_back_32bit;
bool uora_exists;
- bool ack_enabled;
u8 uora_ocw_range;
u16 frame_time_rts_th;
bool he_support;
bool twt_requester;
bool twt_responder;
- /* association related data */
- bool assoc, ibss_joined;
- bool ibss_creator;
- u16 aid;
+ bool twt_protected;
+ bool twt_broadcast;
/* erp related data */
bool use_cts_prot;
bool use_short_preamble;
@@ -642,13 +687,7 @@ struct ieee80211_bss_conf {
s32 cqm_rssi_high;
struct cfg80211_chan_def chandef;
struct ieee80211_mu_group_data mu_group;
- __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
- int arp_addr_cnt;
bool qos;
- bool idle;
- bool ps;
- u8 ssid[IEEE80211_MAX_SSID_LEN];
- size_t ssid_len;
bool hidden_ssid;
int txpower;
enum nl80211_tx_power_setting txpower_type;
@@ -665,8 +704,27 @@ struct ieee80211_bss_conf {
u8 bssid_indicator;
bool ema_ap;
u8 profile_periodicity;
- struct ieee80211_he_operation he_operation;
+ struct {
+ u32 params;
+ u16 nss_set;
+ } he_oper;
struct ieee80211_he_obss_pd he_obss_pd;
+ struct cfg80211_he_bss_color he_bss_color;
+ struct ieee80211_fils_discovery fils_discovery;
+ u32 unsol_bcast_probe_resp_interval;
+ struct cfg80211_bitrate_mask beacon_tx_rate;
+ enum ieee80211_ap_reg_power power_type;
+ struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
+ u8 tx_pwr_env_num;
+ u8 pwr_reduction;
+ bool eht_support;
+
+ bool csa_active;
+ bool mu_mimo_owner;
+ struct ieee80211_chanctx_conf __rcu *chanctx_conf;
+
+ bool color_change_active;
+ u8 color_change_color;
};
/**
@@ -713,9 +771,8 @@ struct ieee80211_bss_conf {
* @IEEE80211_TX_INTFL_OFFCHAN_TX_OK: Internal to mac80211. Used to indicate
* that a frame can be transmitted while the queues are stopped for
* off-channel operation.
- * @IEEE80211_TX_INTFL_NEED_TXPROCESSING: completely internal to mac80211,
- * used to indicate that a pending frame requires TX processing before
- * it can be sent out.
+ * @IEEE80211_TX_CTL_HW_80211_ENCAP: This frame uses hardware encapsulation
+ * (header conversion)
* @IEEE80211_TX_INTFL_RETRIED: completely internal to mac80211,
* used to indicate that a frame was already retried due to PS
* @IEEE80211_TX_INTFL_DONT_ENCRYPT: completely internal to mac80211,
@@ -784,7 +841,7 @@ enum mac80211_tx_info_flags {
IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11),
IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12),
IEEE80211_TX_INTFL_OFFCHAN_TX_OK = BIT(13),
- IEEE80211_TX_INTFL_NEED_TXPROCESSING = BIT(14),
+ IEEE80211_TX_CTL_HW_80211_ENCAP = BIT(14),
IEEE80211_TX_INTFL_RETRIED = BIT(15),
IEEE80211_TX_INTFL_DONT_ENCRYPT = BIT(16),
IEEE80211_TX_CTL_NO_PS_BUFFER = BIT(17),
@@ -805,6 +862,8 @@ enum mac80211_tx_info_flags {
#define IEEE80211_TX_CTL_STBC_SHIFT 23
+#define IEEE80211_TX_RC_S1G_MCS IEEE80211_TX_RC_VHT_MCS
+
/**
* enum mac80211_tx_control_flags - flags to describe transmit control
*
@@ -816,6 +875,22 @@ enum mac80211_tx_info_flags {
* @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
* @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path
* @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup
+ * @IEEE80211_TX_INTCFL_NEED_TXPROCESSING: completely internal to mac80211,
+ * used to indicate that a pending frame requires TX processing before
+ * it can be sent out.
+ * @IEEE80211_TX_CTRL_NO_SEQNO: Do not overwrite the sequence number that
+ * has already been assigned to this frame.
+ * @IEEE80211_TX_CTRL_DONT_REORDER: This frame should not be reordered
+ * relative to other frames that have this flag set, independent
+ * of their QoS TID or other priority field values.
+ * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally
+ * for sequence number assignment
+ * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this
+ * frame should be transmitted on the specific link. This really is
+ * only relevant for frames that do not have data present, and is
+ * also not used for 802.3 format frames. Note that even if the frame
+ * is on a specific link, address translation might still apply if
+ * it's intended for an MLD.
*
* These flags are used in tx_info->control.flags.
*/
@@ -826,6 +901,27 @@ enum mac80211_tx_control_flags {
IEEE80211_TX_CTRL_AMSDU = BIT(3),
IEEE80211_TX_CTRL_FAST_XMIT = BIT(4),
IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5),
+ IEEE80211_TX_INTCFL_NEED_TXPROCESSING = BIT(6),
+ IEEE80211_TX_CTRL_NO_SEQNO = BIT(7),
+ IEEE80211_TX_CTRL_DONT_REORDER = BIT(8),
+ IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9),
+ IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000,
+};
+
+#define IEEE80211_LINK_UNSPECIFIED 0xf
+#define IEEE80211_TX_CTRL_MLO_LINK_UNSPEC \
+ u32_encode_bits(IEEE80211_LINK_UNSPECIFIED, \
+ IEEE80211_TX_CTRL_MLO_LINK)
+
+/**
+ * enum mac80211_tx_status_flags - flags to describe transmit status
+ *
+ * @IEEE80211_TX_STATUS_ACK_SIGNAL_VALID: ACK signal is valid
+ *
+ * These flags are used in tx_info->status.flags.
+ */
+enum mac80211_tx_status_flags {
+ IEEE80211_TX_STATUS_ACK_SIGNAL_VALID = BIT(0),
};
/*
@@ -964,7 +1060,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
* (3) TX status information - driver tells mac80211 what happened
*
* @flags: transmit info flags, defined above
- * @band: the band to transmit on (use for checking for races)
+ * @band: the band to transmit on (use e.g. for checking for races),
+ * not valid if the interface is an MLD since we won't know which
+ * link the frame will be transmitted on
* @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
* @ack_frame_id: internal frame ID for TX status, used internally
* @tx_time_est: TX time estimate in units of 4us, used internally
@@ -989,8 +1087,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
* @status.ampdu_ack_len: AMPDU ack length
* @status.ampdu_len: AMPDU length
* @status.antenna: (legacy, kept only for iwlegacy)
- * @status.tx_time: airtime consumed for transmission
- * @status.is_valid_ack_signal: ACK signal is valid
+ * @status.tx_time: airtime consumed for transmission; note this is only
+ * used for WMM AC, not for airtime fairness
+ * @status.flags: status flags, see &enum mac80211_tx_status_flags
* @status.status_driver_data: driver use area
* @ack: union part for pure ACK data
* @ack.cookie: cookie for the ACK
@@ -1043,8 +1142,8 @@ struct ieee80211_tx_info {
u8 ampdu_len;
u8 antenna;
u16 tx_time;
- bool is_valid_ack_signal;
- void *status_driver_data[19 / sizeof(void *)];
+ u8 flags;
+ void *status_driver_data[18 / sizeof(void *)];
} status;
struct {
struct ieee80211_tx_rate driver_rates[
@@ -1075,19 +1174,46 @@ ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info)
return info->tx_time_est << 2;
}
+/***
+ * struct ieee80211_rate_status - mrr stage for status path
+ *
+ * This struct is used in struct ieee80211_tx_status to provide drivers a
+ * dynamic way to report about used rates and power levels per packet.
+ *
+ * @rate_idx The actual used rate.
+ * @try_count How often the rate was tried.
+ * @tx_power_idx An idx into the ieee80211_hw->tx_power_levels list of the
+ * corresponding wifi hardware. The idx shall point to the power level
+ * that was used when sending the packet.
+ */
+struct ieee80211_rate_status {
+ struct rate_info rate_idx;
+ u8 try_count;
+ u8 tx_power_idx;
+};
+
/**
* struct ieee80211_tx_status - extended tx status info for rate control
*
* @sta: Station that the packet was transmitted for
* @info: Basic tx status information
* @skb: Packet skb (can be NULL if not provided by the driver)
- * @rate: The TX rate that was used when sending the packet
+ * @rates: Mrr stages that were used when sending the packet
+ * @n_rates: Number of mrr stages (count of instances for @rates)
+ * @free_list: list where processed skbs are stored to be free'd by the driver
+ * @ack_hwtstamp: Hardware timestamp of the received ack in nanoseconds
+ * Only needed for Timing measurement and Fine timing measurement action
+ * frames. Only reported by devices that have timestamping enabled.
*/
struct ieee80211_tx_status {
struct ieee80211_sta *sta;
struct ieee80211_tx_info *info;
struct sk_buff *skb;
- struct rate_info *rate;
+ struct ieee80211_rate_status *rates;
+ ktime_t ack_hwtstamp;
+ u8 n_rates;
+
+ struct list_head *free_list;
};
/**
@@ -1130,9 +1256,9 @@ static inline struct ieee80211_rx_status *IEEE80211_SKB_RXCB(struct sk_buff *skb
* in the TX status but the rate control information (it does clear
* the count since you need to fill that in anyway).
*
- * NOTE: You can only use this function if you do NOT use
- * info->driver_data! Use info->rate_driver_data
- * instead if you need only the less space that allows.
+ * NOTE: While the rates array is kept intact, this will wipe all of the
+ * driver_data fields in info, so it's up to the driver to restore
+ * any fields it needs after calling this helper.
*/
static inline void
ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
@@ -1147,12 +1273,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
/* clear the rate counts */
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++)
info->status.rates[i].count = 0;
-
- BUILD_BUG_ON(
- offsetof(struct ieee80211_tx_info, status.ack_signal) != 20);
- memset(&info->status.ampdu_ack_len, 0,
- sizeof(struct ieee80211_tx_info) -
- offsetof(struct ieee80211_tx_info, status.ampdu_ack_len));
+ memset_after(&info->status, 0, rates);
}
@@ -1248,6 +1369,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* the "0-length PSDU" field included there. The value for it is
* in &struct ieee80211_rx_status. Note that if this value isn't
* known the frame shouldn't be reported.
+ * @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by
+ * hardware or driver)
*/
enum mac80211_rx_flags {
RX_FLAG_MMIC_ERROR = BIT(0),
@@ -1280,6 +1403,7 @@ enum mac80211_rx_flags {
RX_FLAG_RADIOTAP_HE_MU = BIT(27),
RX_FLAG_RADIOTAP_LSIG = BIT(28),
RX_FLAG_NO_PSDU = BIT(29),
+ RX_FLAG_8023 = BIT(30),
};
/**
@@ -1324,12 +1448,16 @@ enum mac80211_rx_encoding {
* (TSF) timer when the first data symbol (MPDU) arrived at the hardware.
* @boottime_ns: CLOCK_BOOTTIME timestamp the frame was received at, this is
* needed only for beacons and probe responses that update the scan cache.
+ * @ack_tx_hwtstamp: Hardware timestamp for the ack TX in nanoseconds. Only
+ * needed for Timing measurement and Fine timing measurement action frames.
+ * Only reported by devices that have timestamping enabled.
* @device_timestamp: arbitrary timestamp for the device, mac80211 doesn't use
* it but can store it and pass it back to the driver for synchronisation
* @band: the active band when this frame was received
* @freq: frequency the radio was tuned to when receiving this frame, in MHz
* This field must be set for management frames, but isn't strictly needed
* for data (other) frames - for those it only affects radiotap reporting.
+ * @freq_offset: @freq has a positive offset of 500Khz.
* @signal: signal strength when receiving this frame, either in dBm, in dB or
* unspecified depending on the hardware capabilities flags
* @IEEE80211_HW_SIGNAL_*
@@ -1353,14 +1481,21 @@ enum mac80211_rx_encoding {
* each A-MPDU but the same for each subframe within one A-MPDU
* @ampdu_delimiter_crc: A-MPDU delimiter CRC
* @zero_length_psdu_type: radiotap type of the 0-length PSDU
+ * @link_valid: if the link which is identified by @link_id is valid. This flag
+ * is set only when connection is MLO.
+ * @link_id: id of the link used to receive the packet. This is used along with
+ * @link_valid.
*/
struct ieee80211_rx_status {
u64 mactime;
- u64 boottime_ns;
+ union {
+ u64 boottime_ns;
+ ktime_t ack_tx_hwtstamp;
+ };
u32 device_timestamp;
u32 ampdu_reference;
u32 flag;
- u16 freq;
+ u16 freq: 13, freq_offset: 1;
u8 enc_flags;
u8 encoding:2, bw:3, he_ru:3;
u8 he_gi:2, he_dcm:1;
@@ -1374,8 +1509,16 @@ struct ieee80211_rx_status {
s8 chain_signal[IEEE80211_MAX_CHAINS];
u8 ampdu_delimiter_crc;
u8 zero_length_psdu_type;
+ u8 link_valid:1, link_id:4;
};
+static inline u32
+ieee80211_rx_status_to_khz(struct ieee80211_rx_status *rx_status)
+{
+ return MHZ_TO_KHZ(rx_status->freq) +
+ (rx_status->freq_offset ? 500 : 0);
+}
+
/**
* struct ieee80211_vendor_radiotap - vendor radiotap data information
* @present: presence bitmap for this vendor namespace
@@ -1585,6 +1728,65 @@ enum ieee80211_vif_flags {
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
};
+
+/**
+ * enum ieee80211_offload_flags - virtual interface offload flags
+ *
+ * @IEEE80211_OFFLOAD_ENCAP_ENABLED: tx encapsulation offload is enabled
+ * The driver supports sending frames passed as 802.3 frames by mac80211.
+ * It must also support sending 802.11 packets for the same interface.
+ * @IEEE80211_OFFLOAD_ENCAP_4ADDR: support 4-address mode encapsulation offload
+ * @IEEE80211_OFFLOAD_DECAP_ENABLED: rx encapsulation offload is enabled
+ * The driver supports passing received 802.11 frames as 802.3 frames to
+ * mac80211.
+ */
+
+enum ieee80211_offload_flags {
+ IEEE80211_OFFLOAD_ENCAP_ENABLED = BIT(0),
+ IEEE80211_OFFLOAD_ENCAP_4ADDR = BIT(1),
+ IEEE80211_OFFLOAD_DECAP_ENABLED = BIT(2),
+};
+
+/**
+ * struct ieee80211_vif_cfg - interface configuration
+ * @assoc: association status
+ * @ibss_joined: indicates whether this station is part of an IBSS or not
+ * @ibss_creator: indicates if a new IBSS network is being created
+ * @ps: power-save mode (STA only). This flag is NOT affected by
+ * offchannel/dynamic_ps operations.
+ * @aid: association ID number, valid only when @assoc is true
+ * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
+ * may filter ARP queries targeted for other addresses than listed here.
+ * The driver must allow ARP queries targeted for all address listed here
+ * to pass through. An empty list implies no ARP queries need to pass.
+ * @arp_addr_cnt: Number of addresses currently on the list. Note that this
+ * may be larger than %IEEE80211_BSS_ARP_ADDR_LIST_LEN (the arp_addr_list
+ * array size), it's up to the driver what to do in that case.
+ * @ssid: The SSID of the current vif. Valid in AP and IBSS mode.
+ * @ssid_len: Length of SSID given in @ssid.
+ * @s1g: BSS is S1G BSS (affects Association Request format).
+ * @idle: This interface is idle. There's also a global idle flag in the
+ * hardware config which may be more appropriate depending on what
+ * your driver/device needs to do.
+ * @ap_addr: AP MLD address, or BSSID for non-MLO connections
+ * (station mode only)
+ */
+struct ieee80211_vif_cfg {
+ /* association related data */
+ bool assoc, ibss_joined;
+ bool ibss_creator;
+ bool ps;
+ u16 aid;
+
+ __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
+ int arp_addr_cnt;
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ size_t ssid_len;
+ bool s1g;
+ bool idle;
+ u8 ap_addr[ETH_ALEN] __aligned(2);
+};
+
/**
* struct ieee80211_vif - per-interface data
*
@@ -1592,66 +1794,83 @@ enum ieee80211_vif_flags {
* use during the life of a virtual interface.
*
* @type: type of this virtual interface
+ * @cfg: vif configuration, see &struct ieee80211_vif_cfg
* @bss_conf: BSS configuration for this interface, either our own
* or the BSS we're associated to
+ * @link_conf: in case of MLD, the per-link BSS configuration,
+ * indexed by link ID
+ * @valid_links: bitmap of valid links, or 0 for non-MLO.
+ * @active_links: The bitmap of active links, or 0 for non-MLO.
+ * The driver shouldn't change this directly, but use the
+ * API calls meant for that purpose.
* @addr: address of this interface
* @p2p: indicates whether this AP or STA interface is a p2p
* interface, i.e. a GO or p2p-sta respectively
- * @csa_active: marks whether a channel switch is going on. Internally it is
- * write-protected by sdata_lock and local->mtx so holding either is fine
- * for read access.
- * @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @driver_flags: flags/capabilities the driver has for this interface,
* these need to be set (or cleared) when the interface is added
* or, if supported by the driver, the interface type is changed
* at runtime, mac80211 will never touch this field
+ * @offload_flags: hardware offload capabilities/flags for this interface.
+ * These are initialized by mac80211 before calling .add_interface,
+ * .change_interface or .update_vif_offload and updated by the driver
+ * within these ops, based on supported features or runtime change
+ * restrictions.
* @hw_queue: hardware queue for each AC
* @cab_queue: content-after-beacon (DTIM beacon really) queue, AP mode only
- * @chanctx_conf: The channel context this interface is assigned to, or %NULL
- * when it is not assigned. This pointer is RCU-protected due to the TX
- * path needing to access it; even though the netdev carrier will always
- * be off when it is %NULL there can still be races and packets could be
- * processed after it switches back to %NULL.
* @debugfs_dir: debugfs dentry, can be used by drivers to create own per
* interface debug files. Note that it will be NULL for the virtual
* monitor interface (if that is requested.)
* @probe_req_reg: probe requests should be reported to mac80211 for this
* interface.
+ * @rx_mcast_action_reg: multicast Action frames should be reported to mac80211
+ * for this interface.
* @drv_priv: data area for driver use, will always be aligned to
* sizeof(void \*).
* @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
* @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
* protected by fq->lock.
+ * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
+ * &enum ieee80211_offload_flags.
+ * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled.
*/
struct ieee80211_vif {
enum nl80211_iftype type;
+ struct ieee80211_vif_cfg cfg;
struct ieee80211_bss_conf bss_conf;
+ struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS];
+ u16 valid_links, active_links;
u8 addr[ETH_ALEN] __aligned(2);
bool p2p;
- bool csa_active;
- bool mu_mimo_owner;
u8 cab_queue;
u8 hw_queue[IEEE80211_NUM_ACS];
struct ieee80211_txq *txq;
- struct ieee80211_chanctx_conf __rcu *chanctx_conf;
-
u32 driver_flags;
+ u32 offload_flags;
#ifdef CONFIG_MAC80211_DEBUGFS
struct dentry *debugfs_dir;
#endif
- unsigned int probe_req_reg;
+ bool probe_req_reg;
+ bool rx_mcast_action_reg;
bool txqs_stopped[IEEE80211_NUM_ACS];
+ struct ieee80211_vif *mbssid_tx_vif;
+
/* must be last */
- u8 drv_priv[0] __aligned(sizeof(void *));
+ u8 drv_priv[] __aligned(sizeof(void *));
};
+#define for_each_vif_active_link(vif, link, link_id) \
+ for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \
+ if ((!(vif)->active_links || \
+ (vif)->active_links & BIT(link_id)) && \
+ (link = rcu_dereference((vif)->link_conf[link_id])))
+
static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
{
#ifdef CONFIG_MAC80211_MESH
@@ -1679,14 +1898,24 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
*
* This can be used by mac80211 drivers with direct cfg80211 APIs
* (like the vendor commands) that needs to get the wdev for a vif.
- *
- * Note that this function may return %NULL if the given wdev isn't
- * associated with a vif that the driver knows about (e.g. monitor
- * or AP_VLAN interfaces.)
+ * This can also be useful to get the netdev associated to a vif.
*/
struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
/**
+ * lockdep_vif_mutex_held - for lockdep checks on link poiners
+ * @vif: the interface to check
+ */
+static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif)
+{
+ return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx);
+}
+
+#define link_conf_dereference_protected(vif, link_id) \
+ rcu_dereference_protected((vif)->link_conf[link_id], \
+ lockdep_vif_mutex_held(vif))
+
+/**
* enum ieee80211_key_flags - key flags
*
* These flags are used for communication about keys between the driver
@@ -1767,6 +1996,7 @@ enum ieee80211_key_flags {
* - Temporal Authenticator Rx MIC Key (64 bits)
* @icv_len: The ICV length for this key type
* @iv_len: The IV length for this key type
+ * @link_id: the link ID for MLO, or -1 for non-MLO or pairwise keys
*/
struct ieee80211_key_conf {
atomic64_t tx_pn;
@@ -1776,8 +2006,9 @@ struct ieee80211_key_conf {
u8 hw_key_idx;
s8 keyidx;
u16 flags;
+ s8 link_id;
u8 keylen;
- u8 key[0];
+ u8 key[];
};
#define IEEE80211_MAX_PN_LEN 16
@@ -1825,36 +2056,6 @@ struct ieee80211_key_seq {
};
/**
- * struct ieee80211_cipher_scheme - cipher scheme
- *
- * This structure contains a cipher scheme information defining
- * the secure packet crypto handling.
- *
- * @cipher: a cipher suite selector
- * @iftype: a cipher iftype bit mask indicating an allowed cipher usage
- * @hdr_len: a length of a security header used the cipher
- * @pn_len: a length of a packet number in the security header
- * @pn_off: an offset of pn from the beginning of the security header
- * @key_idx_off: an offset of key index byte in the security header
- * @key_idx_mask: a bit mask of key_idx bits
- * @key_idx_shift: a bit shift needed to get key_idx
- * key_idx value calculation:
- * (sec_header_base[key_idx_off] & key_idx_mask) >> key_idx_shift
- * @mic_len: a mic length in bytes
- */
-struct ieee80211_cipher_scheme {
- u32 cipher;
- u16 iftype;
- u8 hdr_len;
- u8 pn_len;
- u8 pn_off;
- u8 key_idx_off;
- u8 key_idx_mask;
- u8 key_idx_shift;
- u8 mic_len;
-};
-
-/**
* enum set_key_cmd - key command
*
* Used with the set_key() callback in &struct ieee80211_ops, this
@@ -1893,6 +2094,7 @@ enum ieee80211_sta_state {
* @IEEE80211_STA_RX_BW_80: station can receive up to 80 MHz
* @IEEE80211_STA_RX_BW_160: station can receive up to 160 MHz
* (including 80+80 MHz)
+ * @IEEE80211_STA_RX_BW_320: station can receive up to 320 MHz
*
* Implementation note: 20 must be zero to be initialized
* correctly, the values must be sorted.
@@ -1902,6 +2104,7 @@ enum ieee80211_sta_rx_bandwidth {
IEEE80211_STA_RX_BW_40,
IEEE80211_STA_RX_BW_80,
IEEE80211_STA_RX_BW_160,
+ IEEE80211_STA_RX_BW_320,
};
/**
@@ -1941,6 +2144,77 @@ struct ieee80211_sta_txpwr {
};
/**
+ * struct ieee80211_sta_aggregates - info that is aggregated from active links
+ *
+ * Used for any per-link data that needs to be aggregated and updated in the
+ * main &struct ieee80211_sta when updated or the active links change.
+ *
+ * @max_amsdu_len: indicates the maximal length of an A-MSDU in bytes.
+ * This field is always valid for packets with a VHT preamble.
+ * For packets with a HT preamble, additional limits apply:
+ *
+ * * If the skb is transmitted as part of a BA agreement, the
+ * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes.
+ * * If the skb is not part of a BA agreement, the A-MSDU maximal
+ * size is min(max_amsdu_len, 7935) bytes.
+ *
+ * Both additional HT limits must be enforced by the low level
+ * driver. This is defined by the spec (IEEE 802.11-2012 section
+ * 8.3.2.2 NOTE 2).
+ * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control.
+ * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID
+ */
+struct ieee80211_sta_aggregates {
+ u16 max_amsdu_len;
+
+ u16 max_rc_amsdu_len;
+ u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS];
+};
+
+/**
+ * struct ieee80211_link_sta - station Link specific info
+ * All link specific info for a STA link for a non MLD STA(single)
+ * or a MLD STA(multiple entries) are stored here.
+ *
+ * @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr
+ * in ieee80211_sta. For MLO Link STA this addr can be same or different
+ * from addr in ieee80211_sta (representing MLD STA addr)
+ * @link_id: the link ID for this link STA (0 for deflink)
+ * @smps_mode: current SMPS mode (off, static or dynamic)
+ * @supp_rates: Bitmap of supported rates
+ * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
+ * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
+ * @he_cap: HE capabilities of this STA
+ * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities
+ * @eht_cap: EHT capabilities of this STA
+ * @bandwidth: current bandwidth the station can receive with
+ * @rx_nss: in HT/VHT, the maximum number of spatial streams the
+ * station can receive at the moment, changed by operating mode
+ * notifications and capabilities. The value is only valid after
+ * the station moves to associated state.
+ * @txpwr: the station tx power configuration
+ *
+ */
+struct ieee80211_link_sta {
+ u8 addr[ETH_ALEN];
+ u8 link_id;
+ enum ieee80211_smps_mode smps_mode;
+
+ u32 supp_rates[NUM_NL80211_BANDS];
+ struct ieee80211_sta_ht_cap ht_cap;
+ struct ieee80211_sta_vht_cap vht_cap;
+ struct ieee80211_sta_he_cap he_cap;
+ struct ieee80211_he_6ghz_capa he_6ghz_capa;
+ struct ieee80211_sta_eht_cap eht_cap;
+
+ struct ieee80211_sta_aggregates agg;
+
+ u8 rx_nss;
+ enum ieee80211_sta_rx_bandwidth bandwidth;
+ struct ieee80211_sta_txpwr txpwr;
+};
+
+/**
* struct ieee80211_sta - station table entry
*
* A station table entry represents a station we are possibly
@@ -1949,13 +2223,11 @@ struct ieee80211_sta_txpwr {
* either be protected by rcu_read_lock() explicitly or implicitly,
* or you must take good care to not use such a pointer after a
* call to your sta_remove callback that removed it.
+ * This also represents the MLD STA in case of MLO association
+ * and holds pointers to various link STA's
*
* @addr: MAC address
* @aid: AID we assigned to the station if we're an AP
- * @supp_rates: Bitmap of supported rates (per band)
- * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
- * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
- * @he_cap: HE capabilities of this STA
* @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU
* that this station is allowed to transmit to us.
* Can be modified by driver.
@@ -1967,73 +2239,81 @@ struct ieee80211_sta_txpwr {
* if wme is supported. The bits order is like in
* IEEE80211_WMM_IE_STA_QOSINFO_AC_*.
* @max_sp: max Service Period. Only valid if wme is supported.
- * @bandwidth: current bandwidth the station can receive with
- * @rx_nss: in HT/VHT, the maximum number of spatial streams the
- * station can receive at the moment, changed by operating mode
- * notifications and capabilities. The value is only valid after
- * the station moves to associated state.
- * @smps_mode: current SMPS mode (off, static or dynamic)
* @rates: rate control selection table
* @tdls: indicates whether the STA is a TDLS peer
* @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only
* valid if the STA is a TDLS peer in the first place.
* @mfp: indicates whether the STA uses management frame protection or not.
+ * @mlo: indicates whether the STA is MLO station.
* @max_amsdu_subframes: indicates the maximal number of MSDUs in a single
* A-MSDU. Taken from the Extended Capabilities element. 0 means
* unlimited.
+ * @cur: currently valid data as aggregated from the active links
+ * For non MLO STA it will point to the deflink data. For MLO STA
+ * ieee80211_sta_recalc_aggregates() must be called to update it.
* @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not.
- * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control.
- * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID
* @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that
* the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames
+ * @deflink: This holds the default link STA information, for non MLO STA all link
+ * specific STA information is accessed through @deflink or through
+ * link[0] which points to address of @deflink. For MLO Link STA
+ * the first added link STA will point to deflink.
+ * @link: reference to Link Sta entries. For Non MLO STA, except 1st link,
+ * i.e link[0] all links would be assigned to NULL by default and
+ * would access link information via @deflink or link[0]. For MLO
+ * STA, first link STA being added will point its link pointer to
+ * @deflink address and remaining would be allocated and the address
+ * would be assigned to link[link_id] where link_id is the id assigned
+ * by the AP.
+ * @valid_links: bitmap of valid links, or 0 for non-MLO
*/
struct ieee80211_sta {
- u32 supp_rates[NUM_NL80211_BANDS];
u8 addr[ETH_ALEN];
u16 aid;
- struct ieee80211_sta_ht_cap ht_cap;
- struct ieee80211_sta_vht_cap vht_cap;
- struct ieee80211_sta_he_cap he_cap;
u16 max_rx_aggregation_subframes;
bool wme;
u8 uapsd_queues;
u8 max_sp;
- u8 rx_nss;
- enum ieee80211_sta_rx_bandwidth bandwidth;
- enum ieee80211_smps_mode smps_mode;
struct ieee80211_sta_rates __rcu *rates;
bool tdls;
bool tdls_initiator;
bool mfp;
+ bool mlo;
u8 max_amsdu_subframes;
- /**
- * @max_amsdu_len:
- * indicates the maximal length of an A-MSDU in bytes.
- * This field is always valid for packets with a VHT preamble.
- * For packets with a HT preamble, additional limits apply:
- *
- * * If the skb is transmitted as part of a BA agreement, the
- * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes.
- * * If the skb is not part of a BA agreement, the A-MSDU maximal
- * size is min(max_amsdu_len, 7935) bytes.
- *
- * Both additional HT limits must be enforced by the low level
- * driver. This is defined by the spec (IEEE 802.11-2012 section
- * 8.3.2.2 NOTE 2).
- */
- u16 max_amsdu_len;
+ struct ieee80211_sta_aggregates *cur;
+
bool support_p2p_ps;
- u16 max_rc_amsdu_len;
- u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS];
- struct ieee80211_sta_txpwr txpwr;
struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1];
+ u16 valid_links;
+ struct ieee80211_link_sta deflink;
+ struct ieee80211_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
+
/* must be last */
- u8 drv_priv[0] __aligned(sizeof(void *));
+ u8 drv_priv[] __aligned(sizeof(void *));
};
+#ifdef CONFIG_LOCKDEP
+bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta);
+#else
+static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta)
+{
+ return true;
+}
+#endif
+
+#define link_sta_dereference_protected(sta, link_id) \
+ rcu_dereference_protected((sta)->link[link_id], \
+ lockdep_sta_mutex_held(sta))
+
+#define for_each_sta_active_link(vif, sta, link_sta, link_id) \
+ for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \
+ if ((!(vif)->active_links || \
+ (vif)->active_links & BIT(link_id)) && \
+ ((link_sta) = link_sta_dereference_protected(sta, link_id)))
+
/**
* enum sta_notify_cmd - sta notify command
*
@@ -2077,7 +2357,7 @@ struct ieee80211_txq {
u8 ac;
/* must be last */
- u8 drv_priv[0] __aligned(sizeof(void *));
+ u8 drv_priv[] __aligned(sizeof(void *));
};
/**
@@ -2301,6 +2581,24 @@ struct ieee80211_txq {
* aggregating MPDUs with the same keyid, allowing mac80211 to keep Tx
* A-MPDU sessions active while rekeying with Extended Key ID.
*
+ * @IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD: Hardware supports tx encapsulation
+ * offload
+ *
+ * @IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD: Hardware supports rx decapsulation
+ * offload
+ *
+ * @IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP: Hardware supports concurrent rx
+ * decapsulation offload and passing raw 802.11 frames for monitor iface.
+ * If this is supported, the driver must pass both 802.3 frames for real
+ * usage and 802.11 frames with %RX_FLAG_ONLY_MONITOR set for monitor to
+ * the stack.
+ *
+ * @IEEE80211_HW_DETECTS_COLOR_COLLISION: HW/driver has support for BSS color
+ * collision detection and doesn't need it in software.
+ *
+ * @IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX: Hardware/driver handles transmitting
+ * multicast frames on all links, mac80211 should not do that.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2353,6 +2651,11 @@ enum ieee80211_hw_flags {
IEEE80211_HW_SUPPORTS_MULTI_BSSID,
IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT,
+ IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD,
+ IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD,
+ IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP,
+ IEEE80211_HW_DETECTS_COLOR_COLLISION,
+ IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -2468,9 +2771,6 @@ enum ieee80211_hw_flags {
* deliver to a WMM STA during any Service Period triggered by the WMM STA.
* Use IEEE80211_WMM_IE_STA_QOSINFO_SP_* for correct values.
*
- * @n_cipher_schemes: a size of an array of cipher schemes definitions.
- * @cipher_schemes: a pointer to an array of cipher scheme definitions
- * supported by HW.
* @max_nan_de_entries: maximum number of NAN DE functions supported by the
* device.
*
@@ -2482,6 +2782,12 @@ enum ieee80211_hw_flags {
* refilling deficit of each TXQ.
*
* @max_mtu: the max mtu could be set.
+ *
+ * @tx_power_levels: a list of power levels supported by the wifi hardware.
+ * The power levels can be specified either as integer or fractions.
+ * The power level at idx 0 shall be the maximum positive power level.
+ *
+ * @max_txpwr_levels_idx: the maximum valid idx of 'tx_power_levels' list.
*/
struct ieee80211_hw {
struct ieee80211_conf conf;
@@ -2514,12 +2820,12 @@ struct ieee80211_hw {
netdev_features_t netdev_features;
u8 uapsd_queues;
u8 uapsd_max_sp_len;
- u8 n_cipher_schemes;
- const struct ieee80211_cipher_scheme *cipher_schemes;
u8 max_nan_de_entries;
u8 tx_sk_pacing_shift;
u8 weight_multiplier;
u32 max_mtu;
+ const s8 *tx_power_levels;
+ u8 max_txpwr_levels_idx;
};
static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
@@ -2703,15 +3009,15 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* for devices that support offload of data packets (e.g. ARP responses).
*
* Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag
- * when they are able to replace in-use PTK keys according to to following
+ * when they are able to replace in-use PTK keys according to the following
* requirements:
- * 1) They do not hand over frames decrypted with the old key to
- mac80211 once the call to set_key() with command %DISABLE_KEY has been
- completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key,
+ * 1) They do not hand over frames decrypted with the old key to mac80211
+ once the call to set_key() with command %DISABLE_KEY has been completed,
2) either drop or continue to use the old key for any outgoing frames queued
at the time of the key deletion (including re-transmits),
3) never send out a frame queued prior to the set_key() %SET_KEY command
- encrypted with the new key and
+ encrypted with the new key when also needing
+ @IEEE80211_KEY_FLAG_GENERATE_IV and
4) never send out a frame unencrypted when it should be encrypted.
Mac80211 will not queue any new frames for a deleted key to the driver.
*/
@@ -3087,6 +3393,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* @FIF_PSPOLL: pass PS Poll frames
*
* @FIF_PROBE_REQ: pass probe request frames
+ *
+ * @FIF_MCAST_ACTION: pass multicast Action frames
*/
enum ieee80211_filter_flags {
FIF_ALLMULTI = 1<<1,
@@ -3097,6 +3405,7 @@ enum ieee80211_filter_flags {
FIF_OTHER_BSS = 1<<6,
FIF_PSPOLL = 1<<7,
FIF_PROBE_REQ = 1<<8,
+ FIF_MCAST_ACTION = 1<<9,
};
/**
@@ -3113,7 +3422,10 @@ enum ieee80211_filter_flags {
* @IEEE80211_AMPDU_RX_START: start RX aggregation
* @IEEE80211_AMPDU_RX_STOP: stop RX aggregation
* @IEEE80211_AMPDU_TX_START: start TX aggregation, the driver must either
- * call ieee80211_start_tx_ba_cb_irqsafe() or return the special
+ * call ieee80211_start_tx_ba_cb_irqsafe() or
+ * call ieee80211_start_tx_ba_cb_irqsafe() with status
+ * %IEEE80211_AMPDU_TX_START_DELAY_ADDBA to delay addba after
+ * ieee80211_start_tx_ba_cb_irqsafe is called, or just return the special
* status %IEEE80211_AMPDU_TX_START_IMMEDIATE.
* @IEEE80211_AMPDU_TX_OPERATIONAL: TX aggregation has become operational
* @IEEE80211_AMPDU_TX_STOP_CONT: stop TX aggregation but continue transmitting
@@ -3139,6 +3451,7 @@ enum ieee80211_ampdu_mlme_action {
};
#define IEEE80211_AMPDU_TX_START_IMMEDIATE 1
+#define IEEE80211_AMPDU_TX_START_DELAY_ADDBA 2
/**
* struct ieee80211_ampdu_params - AMPDU action parameters
@@ -3218,7 +3531,7 @@ enum ieee80211_roc_type {
};
/**
- * enum ieee80211_reconfig_complete_type - reconfig type
+ * enum ieee80211_reconfig_type - reconfig type
*
* This enum is used by the reconfig_complete() callback to indicate what
* reconfiguration type was completed.
@@ -3234,6 +3547,21 @@ enum ieee80211_reconfig_type {
};
/**
+ * struct ieee80211_prep_tx_info - prepare TX information
+ * @duration: if non-zero, hint about the required duration,
+ * only used with the mgd_prepare_tx() method.
+ * @subtype: frame subtype (auth, (re)assoc, deauth, disassoc)
+ * @success: whether the frame exchange was successful, only
+ * used with the mgd_complete_tx() method, and then only
+ * valid for auth and (re)assoc.
+ */
+struct ieee80211_prep_tx_info {
+ u16 duration;
+ u16 subtype;
+ u8 success:1;
+};
+
+/**
* struct ieee80211_ops - callbacks from mac80211 to the driver
*
* This structure contains various callbacks that the driver may
@@ -3323,6 +3651,22 @@ enum ieee80211_reconfig_type {
* for association indication. The @changed parameter indicates which
* of the bss parameters has changed when a call is made. The callback
* can sleep.
+ * Note: this callback is called if @vif_cfg_changed or @link_info_changed
+ * are not implemented.
+ *
+ * @vif_cfg_changed: Handler for configuration requests related to interface
+ * (MLD) parameters from &struct ieee80211_vif_cfg that vary during the
+ * lifetime of the interface (e.g. assoc status, IP addresses, etc.)
+ * The @changed parameter indicates which value changed.
+ * The callback can sleep.
+ *
+ * @link_info_changed: Handler for configuration requests related to link
+ * parameters from &struct ieee80211_bss_conf that are related to an
+ * individual link. e.g. legacy/HT/VHT/... rate information.
+ * The @changed parameter indicates which value changed, and the @link_id
+ * parameter indicates the link ID. Note that the @link_id will be 0 for
+ * non-MLO connections.
+ * The callback can sleep.
*
* @prepare_multicast: Prepare for multicast filter configuration.
* This callback is optional, and its return value is passed
@@ -3448,6 +3792,10 @@ enum ieee80211_reconfig_type {
* in AP mode, this callback will not be called when the flag
* %IEEE80211_HW_AP_LINK_PS is set. Must be atomic.
*
+ * @sta_set_txpwr: Configure the station tx power. This callback set the tx
+ * power for the station.
+ * This callback can sleep.
+ *
* @sta_state: Notifies low level driver about state transition of a
* station (which can be the AP, a client, IBSS/WDS/mesh peer etc.)
* This callback is mutually exclusive with @sta_add/@sta_remove.
@@ -3641,9 +3989,13 @@ enum ieee80211_reconfig_type {
* frame in case that no beacon was heard from the AP/P2P GO.
* The callback will be called before each transmission and upon return
* mac80211 will transmit the frame right away.
- * If duration is greater than zero, mac80211 hints to the driver the
- * duration for which the operation is requested.
+ * Additional information is passed in the &struct ieee80211_prep_tx_info
+ * data. If duration there is greater than zero, mac80211 hints to the
+ * driver the duration for which the operation is requested.
* The callback is optional and can (should!) sleep.
+ * @mgd_complete_tx: Notify the driver that the response frame for a previously
+ * transmitted frame announced with @mgd_prepare_tx was received, the data
+ * is filled similarly to @mgd_prepare_tx though the duration is not used.
*
* @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending
* a TDLS discovery-request, we expect a reply to arrive on the AP's
@@ -3698,7 +4050,7 @@ enum ieee80211_reconfig_type {
* decremented, and when they reach 1 the driver must call
* ieee80211_csa_finish(). Drivers which use ieee80211_beacon_get()
* get the csa counter decremented by mac80211, but must check if it is
- * 1 using ieee80211_csa_is_complete() after the beacon has been
+ * 1 using ieee80211_beacon_counter_is_complete() after the beacon has been
* transmitted and then call ieee80211_csa_finish().
* If the CSA count starts as zero or 1, this function will not be called,
* since there won't be any time to beacon before the switch anyway.
@@ -3773,6 +4125,45 @@ enum ieee80211_reconfig_type {
*
* @start_pmsr: start peer measurement (e.g. FTM) (this call can sleep)
* @abort_pmsr: abort peer measurement (this call can sleep)
+ * @set_tid_config: Apply TID specific configurations. This callback may sleep.
+ * @reset_tid_config: Reset TID specific configuration for the peer.
+ * This callback may sleep.
+ * @update_vif_offload: Update virtual interface offload flags
+ * This callback may sleep.
+ * @sta_set_4addr: Called to notify the driver when a station starts/stops using
+ * 4-address mode
+ * @set_sar_specs: Update the SAR (TX power) settings.
+ * @sta_set_decap_offload: Called to notify the driver when a station is allowed
+ * to use rx decapsulation offload
+ * @add_twt_setup: Update hw with TWT agreement parameters received from the peer.
+ * This callback allows the hw to check if requested parameters
+ * are supported and if there is enough room for a new agreement.
+ * The hw is expected to set agreement result in the req_type field of
+ * twt structure.
+ * @twt_teardown_request: Update the hw with TWT teardown request received
+ * from the peer.
+ * @set_radar_background: Configure dedicated offchannel chain available for
+ * radar/CAC detection on some hw. This chain can't be used to transmit
+ * or receive frames and it is bounded to a running wdev.
+ * Background radar/CAC detection allows to avoid the CAC downtime
+ * switching to a different channel during CAC detection on the selected
+ * radar channel.
+ * The caller is expected to set chandef pointer to NULL in order to
+ * disable background CAC/radar detection.
+ * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to
+ * resolve a path for hardware flow offloading
+ * @change_vif_links: Change the valid links on an interface, note that while
+ * removing the old link information is still valid (link_conf pointer),
+ * but may immediately disappear after the function returns. The old or
+ * new links bitmaps may be 0 if going from/to a non-MLO situation.
+ * The @old array contains pointers to the old bss_conf structures
+ * that were already removed, in case they're needed.
+ * This callback can sleep.
+ * @change_sta_links: Change the valid links of a station, similar to
+ * @change_vif_links. This callback can sleep.
+ * Note that a sta can also be inserted or removed with valid links,
+ * i.e. passed to @sta_add/@sta_state with sta->valid_links not zero.
+ * In fact, cannot change from having valid_links and not having them.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
@@ -3796,10 +4187,19 @@ struct ieee80211_ops {
void (*bss_info_changed)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_bss_conf *info,
- u32 changed);
+ u64 changed);
+ void (*vif_cfg_changed)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u64 changed);
+ void (*link_info_changed)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *info,
+ u64 changed);
- int (*start_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
- void (*stop_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
+ int (*start_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
+ void (*stop_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
u64 (*prepare_multicast)(struct ieee80211_hw *hw,
struct netdev_hw_addr_list *mc_list);
@@ -3882,7 +4282,8 @@ struct ieee80211_ops {
struct ieee80211_sta *sta,
struct station_info *sinfo);
int (*conf_tx)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif, u16 ac,
+ struct ieee80211_vif *vif,
+ unsigned int link_id, u16 ac,
const struct ieee80211_tx_queue_params *params);
u64 (*get_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
void (*set_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
@@ -3984,7 +4385,10 @@ struct ieee80211_ops {
void (*mgd_prepare_tx)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- u16 duration);
+ struct ieee80211_prep_tx_info *info);
+ void (*mgd_complete_tx)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_prep_tx_info *info);
void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif);
@@ -3998,9 +4402,11 @@ struct ieee80211_ops {
u32 changed);
int (*assign_vif_chanctx)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf,
struct ieee80211_chanctx_conf *ctx);
void (*unassign_vif_chanctx)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf,
struct ieee80211_chanctx_conf *ctx);
int (*switch_vif_chanctx)(struct ieee80211_hw *hw,
struct ieee80211_vif_chanctx_switch *vifs,
@@ -4077,6 +4483,42 @@ struct ieee80211_ops {
struct cfg80211_pmsr_request *request);
void (*abort_pmsr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct cfg80211_pmsr_request *request);
+ int (*set_tid_config)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct cfg80211_tid_config *tid_conf);
+ int (*reset_tid_config)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta, u8 tids);
+ void (*update_vif_offload)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif);
+ void (*sta_set_4addr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta, bool enabled);
+ int (*set_sar_specs)(struct ieee80211_hw *hw,
+ const struct cfg80211_sar_specs *sar);
+ void (*sta_set_decap_offload)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta, bool enabled);
+ void (*add_twt_setup)(struct ieee80211_hw *hw,
+ struct ieee80211_sta *sta,
+ struct ieee80211_twt_setup *twt);
+ void (*twt_teardown_request)(struct ieee80211_hw *hw,
+ struct ieee80211_sta *sta, u8 flowid);
+ int (*set_radar_background)(struct ieee80211_hw *hw,
+ struct cfg80211_chan_def *chandef);
+ int (*net_fill_forward_path)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct net_device_path_ctx *ctx,
+ struct net_device_path *path);
+ int (*change_vif_links)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u16 old_links, u16 new_links,
+ struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]);
+ int (*change_sta_links)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ u16 old_links, u16 new_links);
};
/**
@@ -4313,6 +4755,31 @@ void ieee80211_free_hw(struct ieee80211_hw *hw);
void ieee80211_restart_hw(struct ieee80211_hw *hw);
/**
+ * ieee80211_rx_list - receive frame and store processed skbs in a list
+ *
+ * Use this function to hand received frames to mac80211. The receive
+ * buffer in @skb must start with an IEEE 802.11 header. In case of a
+ * paged @skb is used, the driver is recommended to put the ieee80211
+ * header of the frame on the linear part of the @skb to avoid memory
+ * allocation and/or memcpy by the stack.
+ *
+ * This function may not be called in IRQ context. Calls to this function
+ * for a single hardware must be synchronized against each other. Calls to
+ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
+ * mixed for a single hardware. Must not run concurrently with
+ * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ *
+ * This function must be called with BHs disabled and RCU read lock
+ *
+ * @hw: the hardware this frame came in on
+ * @sta: the station the frame was received from, or %NULL
+ * @skb: the buffer to receive, owned by mac80211 after this call
+ * @list: the destination list
+ */
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
+ struct sk_buff *skb, struct list_head *list);
+
+/**
* ieee80211_rx_napi - receive frame from NAPI context
*
* Use this function to hand received frames to mac80211. The receive
@@ -4660,6 +5127,26 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
struct sk_buff *skb);
/**
+ * ieee80211_tx_status_8023 - transmit status callback for 802.3 frame format
+ *
+ * Call this function for all transmitted data frames after their transmit
+ * completion. This callback should only be called for data frames which
+ * are using driver's (or hardware's) offload capability of encap/decap
+ * 802.11 frames.
+ *
+ * This function may not be called in IRQ context. Calls to this function
+ * for a single hardware must be synchronized against each other and all
+ * calls in the same tx status family.
+ *
+ * @hw: the hardware the frame was transmitted by
+ * @vif: the interface for which the frame was transmitted
+ * @skb: the frame that was transmitted, owned by mac80211 after this call
+ */
+void ieee80211_tx_status_8023(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct sk_buff *skb);
+
+/**
* ieee80211_report_low_ack - report non-responding station
*
* When operating in AP-mode, call this function to report a non-responding
@@ -4670,21 +5157,23 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
*/
void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
-#define IEEE80211_MAX_CSA_COUNTERS_NUM 2
+#define IEEE80211_MAX_CNTDWN_COUNTERS_NUM 2
/**
* struct ieee80211_mutable_offsets - mutable beacon offsets
* @tim_offset: position of TIM element
* @tim_length: size of TIM element
- * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets
- * to CSA counters. This array can contain zero values which
+ * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets
+ * to countdown counters. This array can contain zero values which
* should be ignored.
+ * @mbssid_off: position of the multiple bssid element
*/
struct ieee80211_mutable_offsets {
u16 tim_offset;
u16 tim_length;
- u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM];
+ u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
+ u16 mbssid_off;
};
/**
@@ -4693,6 +5182,7 @@ struct ieee80211_mutable_offsets {
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @offs: &struct ieee80211_mutable_offsets pointer to struct that will
* receive the offsets that may be updated by the driver.
+ * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP)
*
* If the driver implements beaconing modes, it must use this function to
* obtain the beacon template.
@@ -4709,7 +5199,8 @@ struct ieee80211_mutable_offsets {
struct sk_buff *
ieee80211_beacon_get_template(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- struct ieee80211_mutable_offsets *offs);
+ struct ieee80211_mutable_offsets *offs,
+ unsigned int link_id);
/**
* ieee80211_beacon_get_tim - beacon generation function
@@ -4720,6 +5211,7 @@ ieee80211_beacon_get_template(struct ieee80211_hw *hw,
* @tim_length: pointer to variable that will receive the TIM IE length,
* (including the ID and length bytes!).
* Set to 0 if invalid (in non-AP modes).
+ * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP)
*
* If the driver implements beaconing modes, it must use this function to
* obtain the beacon frame.
@@ -4735,49 +5227,52 @@ ieee80211_beacon_get_template(struct ieee80211_hw *hw,
*/
struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- u16 *tim_offset, u16 *tim_length);
+ u16 *tim_offset, u16 *tim_length,
+ unsigned int link_id);
/**
* ieee80211_beacon_get - beacon generation function
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP)
*
* See ieee80211_beacon_get_tim().
*
* Return: See ieee80211_beacon_get_tim().
*/
static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+ struct ieee80211_vif *vif,
+ unsigned int link_id)
{
- return ieee80211_beacon_get_tim(hw, vif, NULL, NULL);
+ return ieee80211_beacon_get_tim(hw, vif, NULL, NULL, link_id);
}
/**
- * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter
+ * ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
*
- * The csa counter should be updated after each beacon transmission.
+ * The beacon counter should be updated after each beacon transmission.
* This function is called implicitly when
* ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the
* beacon frames are generated by the device, the driver should call this
- * function after each beacon transmission to sync mac80211's csa counters.
+ * function after each beacon transmission to sync mac80211's beacon countdown.
*
- * Return: new csa counter value
+ * Return: new countdown value
*/
-u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif);
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif);
/**
- * ieee80211_csa_set_counter - request mac80211 to set csa counter
+ * ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @counter: the new value for the counter
*
- * The csa counter can be changed by the device, this API should be
+ * The beacon countdown can be changed by the device, this API should be
* used by the device driver to update csa counter in mac80211.
*
- * It should never be used together with ieee80211_csa_update_counter(),
+ * It should never be used together with ieee80211_beacon_update_cntdwn(),
* as it will cause a race condition around the counter value.
*/
-void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
+void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter);
/**
* ieee80211_csa_finish - notify mac80211 about channel switch
@@ -4790,13 +5285,22 @@ void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
void ieee80211_csa_finish(struct ieee80211_vif *vif);
/**
- * ieee80211_csa_is_complete - find out if counters reached 1
+ * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
*
- * This function returns whether the channel switch counters reached zero.
+ * This function returns whether the countdown reached zero.
*/
-bool ieee80211_csa_is_complete(struct ieee80211_vif *vif);
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
+/**
+ * ieee80211_color_change_finish - notify mac80211 about color change
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * After a color change announcement was scheduled and the counter in this
+ * announcement hits 1, this function must be called by the driver to
+ * notify mac80211 that the color can be changed
+ */
+void ieee80211_color_change_finish(struct ieee80211_vif *vif);
/**
* ieee80211_proberesp_get - retrieve a Probe Response template
@@ -4834,6 +5338,9 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
* ieee80211_nullfunc_get - retrieve a nullfunc template
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: If the vif is an MLD, get a frame with the link addresses
+ * for the given link ID. For a link_id < 0 you get a frame with
+ * MLD addresses, however useful that might be.
* @qos_ok: QoS NDP is acceptable to the caller, this should be set
* if at all possible
*
@@ -4851,7 +5358,7 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
*/
struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- bool qos_ok);
+ int link_id, bool qos_ok);
/**
* ieee80211_probereq_get - retrieve a Probe Request template
@@ -5160,6 +5667,26 @@ void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
const u8 *replay_ctr, gfp_t gfp);
/**
+ * ieee80211_key_mic_failure - increment MIC failure counter for the key
+ *
+ * Note: this is really only safe if no other RX function is called
+ * at the same time.
+ *
+ * @keyconf: the key in question
+ */
+void ieee80211_key_mic_failure(struct ieee80211_key_conf *keyconf);
+
+/**
+ * ieee80211_key_replay - increment replay counter for the key
+ *
+ * Note: this is really only safe if no other RX function is called
+ * at the same time.
+ *
+ * @keyconf: the key in question
+ */
+void ieee80211_key_replay(struct ieee80211_key_conf *keyconf);
+
+/**
* ieee80211_wake_queue - wake specific queue
* @hw: pointer as obtained from ieee80211_alloc_hw().
* @queue: queue number (counted from zero).
@@ -5251,11 +5778,15 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw);
* @IEEE80211_IFACE_ITER_RESUME_ALL: During resume, iterate over all
* interfaces, even if they haven't been re-added to the driver yet.
* @IEEE80211_IFACE_ITER_ACTIVE: Iterate only active interfaces (netdev is up).
+ * @IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER: Skip any interfaces where SDATA
+ * is not in the driver. This may fix crashes during firmware recovery
+ * for instance.
*/
enum ieee80211_interface_iteration_flags {
IEEE80211_IFACE_ITER_NORMAL = 0,
IEEE80211_IFACE_ITER_RESUME_ALL = BIT(0),
IEEE80211_IFACE_ITER_ACTIVE = BIT(1),
+ IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER = BIT(2),
};
/**
@@ -5324,23 +5855,44 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
void *data);
/**
- * ieee80211_iterate_active_interfaces_rtnl - iterate active interfaces
+ * ieee80211_iterate_active_interfaces_mtx - iterate active interfaces
*
* This function iterates over the interfaces associated with a given
* hardware that are currently active and calls the callback for them.
- * This version can only be used while holding the RTNL.
+ * This version can only be used while holding the wiphy mutex.
+ * The driver must not call this with a lock held that it can also take in
+ * response to callbacks from mac80211, and it must not call this within
+ * callbacks made by mac80211 - both would result in deadlocks.
*
* @hw: the hardware struct of which the interfaces should be iterated over
* @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags
* @iterator: the iterator function to call, cannot sleep
* @data: first argument of the iterator function
*/
-void ieee80211_iterate_active_interfaces_rtnl(struct ieee80211_hw *hw,
- u32 iter_flags,
- void (*iterator)(void *data,
+void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw,
+ u32 iter_flags,
+ void (*iterator)(void *data,
u8 *mac,
struct ieee80211_vif *vif),
- void *data);
+ void *data);
+
+/**
+ * ieee80211_iterate_stations - iterate stations
+ *
+ * This function iterates over all stations associated with a given
+ * hardware that are currently uploaded to the driver and calls the callback
+ * function for them.
+ * This function allows the iterator function to sleep, when the iterator
+ * function is atomic @ieee80211_iterate_stations_atomic can be used.
+ *
+ * @hw: the hardware struct of which the interfaces should be iterated over
+ * @iterator: the iterator function to call, cannot sleep
+ * @data: first argument of the iterator function
+ */
+void ieee80211_iterate_stations(struct ieee80211_hw *hw,
+ void (*iterator)(void *data,
+ struct ieee80211_sta *sta),
+ void *data);
/**
* ieee80211_iterate_stations_atomic - iterate stations
@@ -5478,6 +6030,22 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
const u8 *localaddr);
/**
+ * ieee80211_find_sta_by_link_addrs - find STA by link addresses
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @addr: remote station's link address
+ * @localaddr: local link address, use %NULL for any (but avoid that)
+ * @link_id: pointer to obtain the link ID if the STA is found,
+ * may be %NULL if the link ID is not needed
+ *
+ * Obtain the STA by link address, must use RCU protection.
+ */
+struct ieee80211_sta *
+ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw,
+ const u8 *addr,
+ const u8 *localaddr,
+ unsigned int *link_id);
+
+/**
* ieee80211_sta_block_awake - block station from waking up
* @hw: the hardware
* @pubsta: the station
@@ -5553,9 +6121,22 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta);
void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid);
/**
+ * ieee80211_sta_recalc_aggregates - recalculate aggregate data after a change
+ * @pubsta: the station
+ *
+ * Call this function after changing a per-link aggregate data as referenced in
+ * &struct ieee80211_sta_aggregates by accessing the agg field of
+ * &struct ieee80211_link_sta.
+ *
+ * With non MLO the data in deflink will be referenced directly. In that case
+ * there is no need to call this function.
+ */
+void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta);
+
+/**
* ieee80211_sta_register_airtime - register airtime usage for a sta/tid
*
- * Register airtime usage for a given sta on a given tid. The driver can call
+ * Register airtime usage for a given sta on a given tid. The driver must call
* this function to notify mac80211 that a station used a certain amount of
* airtime. This information will be used by the TXQ scheduler to schedule
* stations in a way that ensures airtime fairness.
@@ -5714,6 +6295,17 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif);
void ieee80211_connection_loss(struct ieee80211_vif *vif);
/**
+ * ieee80211_disconnect - request disconnection
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @reconnect: immediate reconnect is desired
+ *
+ * Request disconnection from the current network and, if enabled, send a
+ * hint to the higher layers that immediate reconnect is desired.
+ */
+void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect);
+
+/**
* ieee80211_resume_disconnect - disconnect from AP after resume
*
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
@@ -5737,6 +6329,16 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif);
void ieee80211_resume_disconnect(struct ieee80211_vif *vif);
/**
+ * ieee80211_hw_restart_disconnect - disconnect from AP after
+ * hardware restart
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * Instructs mac80211 to disconnect from the AP after
+ * hardware restart.
+ */
+void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif);
+
+/**
* ieee80211_cqm_rssi_notify - inform a configured connection quality monitoring
* rssi threshold triggered
*
@@ -5780,15 +6382,28 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw);
void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
/**
+ * ieee80211_channel_switch_disconnect - disconnect due to channel switch error
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @block_tx: if %true, do not send deauth frame.
+ *
+ * Instruct mac80211 to disconnect due to a channel switch error. The channel
+ * switch can request to block the tx and so, we need to make sure we do not send
+ * a deauth frame in this case.
+ */
+void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif,
+ bool block_tx);
+
+/**
* ieee80211_request_smps - request SM PS transition
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: link ID for MLO, or 0
* @smps_mode: new SM PS mode
*
* This allows the driver to request an SM PS transition in managed
* mode. This is useful when the driver has more information than
* the stack about possible interference, for example by bluetooth.
*/
-void ieee80211_request_smps(struct ieee80211_vif *vif,
+void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id,
enum ieee80211_smps_mode smps_mode);
/**
@@ -5964,12 +6579,19 @@ enum rate_control_capabilities {
* otherwise the NSS difference doesn't bother us.
*/
RATE_CTRL_CAPA_VHT_EXT_NSS_BW = BIT(0),
+ /**
+ * @RATE_CTRL_CAPA_AMPDU_TRIGGER:
+ * mac80211 should start A-MPDU sessions on tx
+ */
+ RATE_CTRL_CAPA_AMPDU_TRIGGER = BIT(1),
};
struct rate_control_ops {
unsigned long capa;
const char *name;
- void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir);
+ void *(*alloc)(struct ieee80211_hw *hw);
+ void (*add_debugfs)(struct ieee80211_hw *hw, void *priv,
+ struct dentry *debugfsdir);
void (*free)(void *priv);
void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp);
@@ -6002,7 +6624,7 @@ static inline int rate_supported(struct ieee80211_sta *sta,
enum nl80211_band band,
int index)
{
- return (sta == NULL || sta->supp_rates[band] & BIT(index));
+ return (sta == NULL || sta->deflink.supp_rates[band] & BIT(index));
}
static inline s8
@@ -6113,6 +6735,7 @@ ieee80211_vif_type_p2p(struct ieee80211_vif *vif)
* ieee80211_update_mu_groups - set the VHT MU-MIMO groud data
*
* @vif: the specified virtual interface
+ * @link_id: the link ID for MLO, otherwise 0
* @membership: 64 bits array - a bit is set if station is member of the group
* @position: 2 bits per group id indicating the position in the group
*
@@ -6121,7 +6744,7 @@ ieee80211_vif_type_p2p(struct ieee80211_vif *vif)
* matching GroupId management frame.
* Calls to this function need to be serialized with RX path.
*/
-void ieee80211_update_mu_groups(struct ieee80211_vif *vif,
+void ieee80211_update_mu_groups(struct ieee80211_vif *vif, unsigned int link_id,
const u8 *membership, const u8 *position);
void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif,
@@ -6169,6 +6792,20 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
int band, struct ieee80211_sta **sta);
/**
+ * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header
+ * of injected frames.
+ *
+ * To accurately parse and take into account rate and retransmission fields,
+ * you must initialize the chandef field in the ieee80211_tx_info structure
+ * of the skb before calling this function.
+ *
+ * @skb: packet injected by userspace
+ * @dev: the &struct device of this 802.11 device
+ */
+bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
+ struct net_device *dev);
+
+/**
* struct ieee80211_noa_data - holds temporary data for tracking P2P NoA state
*
* @next_tsf: TSF timestamp of the next absent state change
@@ -6216,7 +6853,7 @@ int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr,
void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf);
/**
- * ieee80211_tdls_oper - request userspace to perform a TDLS operation
+ * ieee80211_tdls_oper_request - request userspace to perform a TDLS operation
* @vif: virtual interface
* @peer: the peer's destination address
* @oper: the requested TDLS operation
@@ -6277,7 +6914,7 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid);
*
* Note that this must be called in an rcu_read_lock() critical section,
* which can only be released after the SKB was handled. Some pointers in
- * skb->cb, e.g. the key pointer, are protected by by RCU and thus the
+ * skb->cb, e.g. the key pointer, are protected by RCU and thus the
* critical section must persist not just for the duration of this call
* but for the duration of the frame handling.
* However, also note that while in the wake_tx_queue() method,
@@ -6479,5 +7116,113 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info,
int len);
+/**
+ * ieee80211_set_hw_80211_encap - enable hardware encapsulation offloading.
+ *
+ * This function is used to notify mac80211 that a vif can be passed raw 802.3
+ * frames. The driver needs to then handle the 802.11 encapsulation inside the
+ * hardware or firmware.
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @enable: indicate if the feature should be turned on or off
+ */
+bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable);
+
+/**
+ * ieee80211_get_fils_discovery_tmpl - Get FILS discovery template.
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * The driver is responsible for freeing the returned skb.
+ *
+ * Return: FILS discovery template. %NULL on error.
+ */
+struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif);
+
+/**
+ * ieee80211_get_unsol_bcast_probe_resp_tmpl - Get unsolicited broadcast
+ * probe response template.
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * The driver is responsible for freeing the returned skb.
+ *
+ * Return: Unsolicited broadcast probe response template. %NULL on error.
+ */
+struct sk_buff *
+ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif);
+
+/**
+ * ieeee80211_obss_color_collision_notify - notify userland about a BSS color
+ * collision.
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
+ * aware of.
+ * @gfp: allocation flags
+ */
+void
+ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+ u64 color_bitmap, gfp_t gfp);
+
+/**
+ * ieee80211_is_tx_data - check if frame is a data frame
+ *
+ * The function is used to check if a frame is a data frame. Frames with
+ * hardware encapsulation enabled are data frames.
+ *
+ * @skb: the frame to be transmitted.
+ */
+static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_hdr *hdr = (void *) skb->data;
+
+ return info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP ||
+ ieee80211_is_data(hdr->frame_control);
+}
+
+/**
+ * ieee80211_set_active_links - set active links in client mode
+ * @vif: interface to set active links on
+ * @active_links: the new active links bitmap
+ *
+ * This changes the active links on an interface. The interface
+ * must be in client mode (in AP mode, all links are always active),
+ * and @active_links must be a subset of the vif's valid_links.
+ *
+ * If a link is switched off and another is switched on at the same
+ * time (e.g. active_links going from 0x1 to 0x10) then you will get
+ * a sequence of calls like
+ * - change_vif_links(0x11)
+ * - unassign_vif_chanctx(link_id=0)
+ * - change_sta_links(0x11) for each affected STA (the AP)
+ * (TDLS connections on now inactive links should be torn down)
+ * - remove group keys on the old link (link_id 0)
+ * - add new group keys (GTK/IGTK/BIGTK) on the new link (link_id 4)
+ * - change_sta_links(0x10) for each affected STA (the AP)
+ * - assign_vif_chanctx(link_id=4)
+ * - change_vif_links(0x10)
+ *
+ * Note: This function acquires some mac80211 locks and must not
+ * be called with any driver locks held that could cause a
+ * lock dependency inversion. Best call it without locks.
+ */
+int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
+
+/**
+ * ieee80211_set_active_links_async - asynchronously set active links
+ * @vif: interface to set active links on
+ * @active_links: the new active links bitmap
+ *
+ * See ieee80211_set_active_links() for more information, the only
+ * difference here is that the link change is triggered async and
+ * can be called in any context, but the link switch will only be
+ * completed after it returns.
+ */
+void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
+ u16 active_links);
#endif /* MAC80211_H */
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index d524ffb9eb25..bdac0ddbdcdb 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -464,6 +464,12 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb,
* ieee802154_wake_queue - wake ieee802154 queue
* @hw: pointer as obtained from ieee802154_alloc_hw().
*
+ * Tranceivers usually have either one transmit framebuffer or one framebuffer
+ * for both transmitting and receiving. Hence, the core currently only handles
+ * one frame at a time for each phy, which means we had to stop the queue to
+ * avoid new skb to come during the transmission. The queue then needs to be
+ * woken up after the operation.
+ *
* Drivers should use this function instead of netif_wake_queue.
*/
void ieee802154_wake_queue(struct ieee802154_hw *hw);
@@ -472,6 +478,12 @@ void ieee802154_wake_queue(struct ieee802154_hw *hw);
* ieee802154_stop_queue - stop ieee802154 queue
* @hw: pointer as obtained from ieee802154_alloc_hw().
*
+ * Tranceivers usually have either one transmit framebuffer or one framebuffer
+ * for both transmitting and receiving. Hence, the core currently only handles
+ * one frame at a time for each phy, which means we need to tell upper layers to
+ * stop giving us new skbs while we are busy with the transmitted one. The queue
+ * must then be stopped before transmitting.
+ *
* Drivers should use this function instead of netif_stop_queue.
*/
void ieee802154_stop_queue(struct ieee802154_hw *hw);
@@ -486,4 +498,23 @@ void ieee802154_stop_queue(struct ieee802154_hw *hw);
void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
bool ifs_handling);
+/**
+ * ieee802154_xmit_error - offloaded frame transmission failed
+ *
+ * @hw: pointer as obtained from ieee802154_alloc_hw().
+ * @skb: buffer for transmission
+ * @reason: error code
+ */
+void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb,
+ int reason);
+
+/**
+ * ieee802154_xmit_hw_error - frame could not be offloaded to the transmitter
+ * because of a hardware error (bus error, timeout, etc)
+ *
+ * @hw: pointer as obtained from ieee802154_alloc_hw().
+ * @skb: buffer for transmission
+ */
+void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb);
+
#endif /* NET_MAC802154_H */
diff --git a/include/net/macsec.h b/include/net/macsec.h
index 92e43db8b566..5b9c61c4d3a6 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -11,18 +11,63 @@
#include <uapi/linux/if_link.h>
#include <uapi/linux/if_macsec.h>
-typedef u64 __bitwise sci_t;
+#define MACSEC_DEFAULT_PN_LEN 4
+#define MACSEC_XPN_PN_LEN 8
#define MACSEC_NUM_AN 4 /* 2 bits for the association number */
+#define MACSEC_SCI_LEN 8
+#define MACSEC_PORT_ES (htons(0x0001))
+
+#define MACSEC_TCI_VERSION 0x80
+#define MACSEC_TCI_ES 0x40 /* end station */
+#define MACSEC_TCI_SC 0x20 /* SCI present */
+#define MACSEC_TCI_SCB 0x10 /* epon */
+#define MACSEC_TCI_E 0x08 /* encryption */
+#define MACSEC_TCI_C 0x04 /* changed text */
+#define MACSEC_AN_MASK 0x03 /* association number */
+#define MACSEC_TCI_CONFID (MACSEC_TCI_E | MACSEC_TCI_C)
+
+#define MACSEC_DEFAULT_ICV_LEN 16
+
+typedef u64 __bitwise sci_t;
+typedef u32 __bitwise ssci_t;
+
+struct metadata_dst;
+
+typedef union salt {
+ struct {
+ u32 ssci;
+ u64 pn;
+ } __packed;
+ u8 bytes[MACSEC_SALT_LEN];
+} __packed salt_t;
+
+typedef union pn {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u32 lower;
+ u32 upper;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u32 upper;
+ u32 lower;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ };
+ u64 full64;
+} pn_t;
+
/**
* struct macsec_key - SA key
* @id: user-provided key identifier
* @tfm: crypto struct, key storage
+ * @salt: salt used to generate IV in XPN cipher suites
*/
struct macsec_key {
u8 id[MACSEC_KEYID_LEN];
struct crypto_aead *tfm;
+ salt_t salt;
};
struct macsec_rx_sc_stats {
@@ -58,18 +103,34 @@ struct macsec_tx_sc_stats {
__u64 OutOctetsEncrypted;
};
+struct macsec_dev_stats {
+ __u64 OutPktsUntagged;
+ __u64 InPktsUntagged;
+ __u64 OutPktsTooLong;
+ __u64 InPktsNoTag;
+ __u64 InPktsBadTag;
+ __u64 InPktsUnknownSCI;
+ __u64 InPktsNoSCI;
+ __u64 InPktsOverrun;
+};
+
/**
* struct macsec_rx_sa - receive secure association
* @active:
* @next_pn: packet number expected for the next packet
* @lock: protects next_pn manipulations
* @key: key structure
+ * @ssci: short secure channel identifier
* @stats: per-SA stats
*/
struct macsec_rx_sa {
struct macsec_key key;
+ ssci_t ssci;
spinlock_t lock;
- u32 next_pn;
+ union {
+ pn_t next_pn_halves;
+ u64 next_pn;
+ };
refcount_t refcnt;
bool active;
struct macsec_rx_sa_stats __percpu *stats;
@@ -110,12 +171,17 @@ struct macsec_rx_sc {
* @next_pn: packet number to use for the next packet
* @lock: protects next_pn manipulations
* @key: key structure
+ * @ssci: short secure channel identifier
* @stats: per-SA stats
*/
struct macsec_tx_sa {
struct macsec_key key;
+ ssci_t ssci;
spinlock_t lock;
- u32 next_pn;
+ union {
+ pn_t next_pn_halves;
+ u64 next_pn;
+ };
refcount_t refcnt;
bool active;
struct macsec_tx_sa_stats __percpu *stats;
@@ -132,6 +198,7 @@ struct macsec_tx_sa {
* @scb: single copy broadcast flag
* @sa: array of secure associations
* @stats: stats for this TXSC
+ * @md_dst: MACsec offload metadata dst
*/
struct macsec_tx_sc {
bool active;
@@ -142,6 +209,7 @@ struct macsec_tx_sc {
bool scb;
struct macsec_tx_sa __rcu *sa[MACSEC_NUM_AN];
struct pcpu_tx_sc_stats __percpu *stats;
+ struct metadata_dst *md_dst;
};
/**
@@ -152,6 +220,7 @@ struct macsec_tx_sc {
* @key_len: length of keys used by the cipher suite
* @icv_len: length of ICV used by the cipher suite
* @validate_frames: validation mode
+ * @xpn: enable XPN for this SecY
* @operational: MAC_Operational flag
* @protect_frames: enable protection for this SecY
* @replay_protect: enable packet number checks on receive
@@ -166,6 +235,7 @@ struct macsec_secy {
u16 key_len;
u16 icv_len;
enum macsec_validation_type validate_frames;
+ bool xpn;
bool operational;
bool protect_frames;
bool replay_protect;
@@ -178,21 +248,29 @@ struct macsec_secy {
* struct macsec_context - MACsec context for hardware offloading
*/
struct macsec_context {
- struct phy_device *phydev;
+ union {
+ struct net_device *netdev;
+ struct phy_device *phydev;
+ };
enum macsec_offload offload;
struct macsec_secy *secy;
struct macsec_rx_sc *rx_sc;
struct {
unsigned char assoc_num;
- u8 key[MACSEC_KEYID_LEN];
+ u8 key[MACSEC_MAX_KEY_LEN];
union {
struct macsec_rx_sa *rx_sa;
struct macsec_tx_sa *tx_sa;
};
} sa;
-
- u8 prepare:1;
+ union {
+ struct macsec_tx_sc_stats *tx_sc_stats;
+ struct macsec_tx_sa_stats *tx_sa_stats;
+ struct macsec_rx_sc_stats *rx_sc_stats;
+ struct macsec_rx_sa_stats *rx_sa_stats;
+ struct macsec_dev_stats *dev_stats;
+ } stats;
};
/**
@@ -217,8 +295,21 @@ struct macsec_ops {
int (*mdo_add_txsa)(struct macsec_context *ctx);
int (*mdo_upd_txsa)(struct macsec_context *ctx);
int (*mdo_del_txsa)(struct macsec_context *ctx);
+ /* Statistics */
+ int (*mdo_get_dev_stats)(struct macsec_context *ctx);
+ int (*mdo_get_tx_sc_stats)(struct macsec_context *ctx);
+ int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx);
+ int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx);
+ int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx);
};
void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa);
+static inline bool macsec_send_sci(const struct macsec_secy *secy)
+{
+ const struct macsec_tx_sc *tx_sc = &secy->tx_sc;
+
+ return tx_sc->send_sci ||
+ (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb);
+}
#endif /* _NET_MACSEC_H_ */
diff --git a/include/net/mctp.h b/include/net/mctp.h
new file mode 100644
index 000000000000..82800d521c3d
--- /dev/null
+++ b/include/net/mctp.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTP_H
+#define __NET_MCTP_H
+
+#include <linux/bits.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/* MCTP packet definitions */
+struct mctp_hdr {
+ u8 ver;
+ u8 dest;
+ u8 src;
+ u8 flags_seq_tag;
+};
+
+#define MCTP_VER_MIN 1
+#define MCTP_VER_MAX 1
+
+/* Definitions for flags_seq_tag field */
+#define MCTP_HDR_FLAG_SOM BIT(7)
+#define MCTP_HDR_FLAG_EOM BIT(6)
+#define MCTP_HDR_FLAG_TO BIT(3)
+#define MCTP_HDR_FLAGS GENMASK(5, 3)
+#define MCTP_HDR_SEQ_SHIFT 4
+#define MCTP_HDR_SEQ_MASK GENMASK(1, 0)
+#define MCTP_HDR_TAG_SHIFT 0
+#define MCTP_HDR_TAG_MASK GENMASK(2, 0)
+
+#define MCTP_INITIAL_DEFAULT_NET 1
+
+static inline bool mctp_address_unicast(mctp_eid_t eid)
+{
+ return eid >= 8 && eid < 255;
+}
+
+static inline bool mctp_address_broadcast(mctp_eid_t eid)
+{
+ return eid == 255;
+}
+
+static inline bool mctp_address_null(mctp_eid_t eid)
+{
+ return eid == 0;
+}
+
+static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid)
+{
+ return match == eid || match == MCTP_ADDR_ANY;
+}
+
+static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
+{
+ return (struct mctp_hdr *)skb_network_header(skb);
+}
+
+/* socket implementation */
+struct mctp_sock {
+ struct sock sk;
+
+ /* bind() params */
+ unsigned int bind_net;
+ mctp_eid_t bind_addr;
+ __u8 bind_type;
+
+ /* sendmsg()/recvmsg() uses struct sockaddr_mctp_ext */
+ bool addr_ext;
+
+ /* list of mctp_sk_key, for incoming tag lookup. updates protected
+ * by sk->net->keys_lock
+ */
+ struct hlist_head keys;
+
+ /* mechanism for expiring allocated keys; will release an allocated
+ * tag, and any netdev state for a request/response pairing
+ */
+ struct timer_list key_expiry;
+};
+
+/* Key for matching incoming packets to sockets or reassembly contexts.
+ * Packets are matched on (src,dest,tag).
+ *
+ * Lifetime / locking requirements:
+ *
+ * - individual key data (ie, the struct itself) is protected by key->lock;
+ * changes must be made with that lock held.
+ *
+ * - the lookup fields: peer_addr, local_addr and tag are set before the
+ * key is added to lookup lists, and never updated.
+ *
+ * - A ref to the key must be held (throuh key->refs) if a pointer to the
+ * key is to be accessed after key->lock is released.
+ *
+ * - a mctp_sk_key contains a reference to a struct sock; this is valid
+ * for the life of the key. On sock destruction (through unhash), the key is
+ * removed from lists (see below), and marked invalid.
+ *
+ * - these mctp_sk_keys appear on two lists:
+ * 1) the struct mctp_sock->keys list
+ * 2) the struct netns_mctp->keys list
+ *
+ * presences on these lists requires a (single) refcount to be held; both
+ * lists are updated as a single operation.
+ *
+ * Updates and lookups in either list are performed under the
+ * netns_mctp->keys lock. Lookup functions will need to lock the key and
+ * take a reference before unlocking the keys_lock. Consequently, the list's
+ * keys_lock *cannot* be acquired with the individual key->lock held.
+ *
+ * - a key may have a sk_buff attached as part of an in-progress message
+ * reassembly (->reasm_head). The reasm data is protected by the individual
+ * key->lock.
+ *
+ * - there are two destruction paths for a mctp_sk_key:
+ *
+ * - through socket unhash (see mctp_sk_unhash). This performs the list
+ * removal under keys_lock.
+ *
+ * - where a key is established to receive a reply message: after receiving
+ * the (complete) reply, or during reassembly errors. Here, we clean up
+ * the reassembly context (marking reasm_dead, to prevent another from
+ * starting), and remove the socket from the netns & socket lists.
+ *
+ * - through an expiry timeout, on a per-socket timer
+ */
+struct mctp_sk_key {
+ mctp_eid_t peer_addr;
+ mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */
+ __u8 tag; /* incoming tag match; invert TO for local */
+
+ /* we hold a ref to sk when set */
+ struct sock *sk;
+
+ /* routing lookup list */
+ struct hlist_node hlist;
+
+ /* per-socket list */
+ struct hlist_node sklist;
+
+ /* lock protects against concurrent updates to the reassembly and
+ * expiry data below.
+ */
+ spinlock_t lock;
+
+ /* Keys are referenced during the output path, which may sleep */
+ refcount_t refs;
+
+ /* incoming fragment reassembly context */
+ struct sk_buff *reasm_head;
+ struct sk_buff **reasm_tailp;
+ bool reasm_dead;
+ u8 last_seq;
+
+ /* key validity */
+ bool valid;
+
+ /* expiry timeout; valid (above) cleared on expiry */
+ unsigned long expiry;
+
+ /* free to use for device flow state tracking. Initialised to
+ * zero on initial key creation
+ */
+ unsigned long dev_flow_state;
+ struct mctp_dev *dev;
+
+ /* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire
+ * automatically on timeout or response, instead SIOCMCTPDROPTAG
+ * is used.
+ */
+ bool manual_alloc;
+};
+
+struct mctp_skb_cb {
+ unsigned int magic;
+ unsigned int net;
+ int ifindex; /* extended/direct addressing if set */
+ mctp_eid_t src;
+ unsigned char halen;
+ unsigned char haddr[MAX_ADDR_LEN];
+};
+
+/* skb control-block accessors with a little extra debugging for initial
+ * development.
+ *
+ * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb
+ * with mctp_cb().
+ *
+ * __mctp_cb() is only for the initial ingress code; we should see ->magic set
+ * at all times after this.
+ */
+static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ cb->magic = 0x4d435450;
+ return cb;
+}
+
+static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ BUILD_BUG_ON(sizeof(struct mctp_skb_cb) > sizeof(skb->cb));
+ WARN_ON(cb->magic != 0x4d435450);
+ return (void *)(skb->cb);
+}
+
+/* If CONFIG_MCTP_FLOWS, we may add one of these as a SKB extension,
+ * indicating the flow to the device driver.
+ */
+struct mctp_flow {
+ struct mctp_sk_key *key;
+};
+
+/* Route definition.
+ *
+ * These are held in the pernet->mctp.routes list, with RCU protection for
+ * removed routes. We hold a reference to the netdev; routes need to be
+ * dropped on NETDEV_UNREGISTER events.
+ *
+ * Updates to the route table are performed under rtnl; all reads under RCU,
+ * so routes cannot be referenced over a RCU grace period. Specifically: A
+ * caller cannot block between mctp_route_lookup and mctp_route_release()
+ */
+struct mctp_route {
+ mctp_eid_t min, max;
+
+ struct mctp_dev *dev;
+ unsigned int mtu;
+ unsigned char type;
+ int (*output)(struct mctp_route *route,
+ struct sk_buff *skb);
+
+ struct list_head list;
+ refcount_t refs;
+ struct rcu_head rcu;
+};
+
+/* route interfaces */
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr);
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+
+void mctp_key_unref(struct mctp_sk_key *key);
+struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr, mctp_eid_t saddr,
+ bool manual, u8 *tagp);
+
+/* routing <--> device interface */
+unsigned int mctp_default_net(struct net *net);
+int mctp_default_net_set(struct net *net, unsigned int index);
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
+void mctp_route_remove_dev(struct mctp_dev *mdev);
+
+/* neighbour definitions */
+enum mctp_neigh_source {
+ MCTP_NEIGH_STATIC,
+ MCTP_NEIGH_DISCOVER,
+};
+
+struct mctp_neigh {
+ struct mctp_dev *dev;
+ mctp_eid_t eid;
+ enum mctp_neigh_source source;
+
+ unsigned char ha[MAX_ADDR_LEN];
+
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+int mctp_neigh_init(void);
+void mctp_neigh_exit(void);
+
+// ret_hwaddr may be NULL, otherwise must have space for MAX_ADDR_LEN
+int mctp_neigh_lookup(struct mctp_dev *dev, mctp_eid_t eid,
+ void *ret_hwaddr);
+void mctp_neigh_remove_dev(struct mctp_dev *mdev);
+
+int mctp_routes_init(void);
+void mctp_routes_exit(void);
+
+void mctp_device_init(void);
+void mctp_device_exit(void);
+
+#endif /* __NET_MCTP_H */
diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h
new file mode 100644
index 000000000000..5c0d04b5c12c
--- /dev/null
+++ b/include/net/mctpdevice.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP) - device
+ * definitions.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTPDEVICE_H
+#define __NET_MCTPDEVICE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/refcount.h>
+
+struct mctp_sk_key;
+
+struct mctp_dev {
+ struct net_device *dev;
+
+ refcount_t refs;
+
+ unsigned int net;
+
+ const struct mctp_netdev_ops *ops;
+
+ /* Only modified under RTNL. Reads have addrs_lock held */
+ u8 *addrs;
+ size_t num_addrs;
+ spinlock_t addrs_lock;
+
+ struct rcu_head rcu;
+};
+
+struct mctp_netdev_ops {
+ void (*release_flow)(struct mctp_dev *dev,
+ struct mctp_sk_key *key);
+};
+
+#define MCTP_INITIAL_DEFAULT_NET 1
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
+
+int mctp_register_netdev(struct net_device *dev,
+ const struct mctp_netdev_ops *ops);
+void mctp_unregister_netdev(struct net_device *dev);
+
+void mctp_dev_hold(struct mctp_dev *mdev);
+void mctp_dev_put(struct mctp_dev *mdev);
+
+void mctp_dev_set_key(struct mctp_dev *dev, struct mctp_sk_key *key);
+void mctp_dev_release_key(struct mctp_dev *dev, struct mctp_sk_key *key);
+
+#endif /* __NET_MCTPDEVICE_H */
diff --git a/include/net/mip6.h b/include/net/mip6.h
index f1c28971c362..67cd7e50804c 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -25,7 +25,7 @@ struct ip6_mh {
__u8 ip6mh_reserved;
__u16 ip6mh_cksum;
/* Followed by type specific messages */
- __u8 data[0];
+ __u8 data[];
} __packed;
#define IP6_MH_TYPE_BRR 0 /* Binding Refresh Request */
diff --git a/include/net/mld.h b/include/net/mld.h
index b0f5b3105ef0..c07359808493 100644
--- a/include/net/mld.h
+++ b/include/net/mld.h
@@ -24,12 +24,12 @@ struct mld2_grec {
__u8 grec_auxwords;
__be16 grec_nsrcs;
struct in6_addr grec_mca;
- struct in6_addr grec_src[0];
+ struct in6_addr grec_src[];
};
struct mld2_report {
struct icmp6hdr mld2r_hdr;
- struct mld2_grec mld2r_grec[0];
+ struct mld2_grec mld2r_grec[];
};
#define mld2r_type mld2r_hdr.icmp6_type
@@ -55,7 +55,7 @@ struct mld2_query {
#endif
__u8 mld2q_qqic;
__be16 mld2q_nsrcs;
- struct in6_addr mld2q_srcs[0];
+ struct in6_addr mld2q_srcs[];
};
#define mld2q_type mld2q_hdr.icmp6_type
@@ -92,6 +92,9 @@ struct mld2_query {
#define MLD_EXP_MIN_LIMIT 32768UL
#define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1)
+#define MLD_MAX_QUEUE 8
+#define MLD_MAX_SKBS 32
+
static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2)
{
/* RFC3810, 5.1.3. Maximum Response Code */
diff --git a/include/net/mpls.h b/include/net/mpls.h
index ccaf238e8ea7..0bb7944e7b08 100644
--- a/include/net/mpls.h
+++ b/include/net/mpls.h
@@ -8,6 +8,7 @@
#include <linux/if_ether.h>
#include <linux/netdevice.h>
+#include <linux/mpls.h>
#define MPLS_HLEN 4
@@ -25,4 +26,20 @@ static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
{
return (struct mpls_shim_hdr *)skb_network_header(skb);
}
+
+static inline struct mpls_shim_hdr mpls_entry_encode(u32 label,
+ unsigned int ttl,
+ unsigned int tc,
+ bool bos)
+{
+ struct mpls_shim_hdr result;
+
+ result.label_stack_entry =
+ cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
+ (tc << MPLS_LS_TC_SHIFT) |
+ (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
+ (ttl << MPLS_LS_TTL_SHIFT));
+ return result;
+}
+
#endif
diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h
index 6b4759eae158..0c71c27979fb 100644
--- a/include/net/mpls_iptunnel.h
+++ b/include/net/mpls_iptunnel.h
@@ -6,12 +6,15 @@
#ifndef _NET_MPLS_IPTUNNEL_H
#define _NET_MPLS_IPTUNNEL_H 1
+#include <linux/types.h>
+#include <net/lwtunnel.h>
+
struct mpls_iptunnel_encap {
u8 labels;
u8 ttl_propagate;
u8 default_ttl;
u8 reserved1;
- u32 label[0];
+ u32 label[];
};
static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index c971d25431ea..412479ebf5ad 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -12,32 +12,92 @@
#include <linux/tcp.h>
#include <linux/types.h>
+struct mptcp_info;
+struct mptcp_sock;
+struct seq_file;
+
/* MPTCP sk_buff extension data */
struct mptcp_ext {
- u64 data_ack;
+ union {
+ u64 data_ack;
+ u32 data_ack32;
+ };
u64 data_seq;
u32 subflow_seq;
u16 data_len;
+ __sum16 csum;
u8 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
mpc_map:1,
- __unused:2;
- /* one byte hole */
+ frozen:1,
+ reset_transient:1;
+ u8 reset_reason:4,
+ csum_reqd:1,
+ infinite_map:1;
+};
+
+#define MPTCPOPT_HMAC_LEN 20
+#define MPTCP_RM_IDS_MAX 8
+
+struct mptcp_rm_list {
+ u8 ids[MPTCP_RM_IDS_MAX];
+ u8 nr;
+};
+
+struct mptcp_addr_info {
+ u8 id;
+ sa_family_t family;
+ __be16 port;
+ union {
+ struct in_addr addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ struct in6_addr addr6;
+#endif
+ };
};
struct mptcp_out_options {
#if IS_ENABLED(CONFIG_MPTCP)
u16 suboptions;
- u64 sndr_key;
- u64 rcvr_key;
- struct mptcp_ext ext_copy;
+ struct mptcp_rm_list rm_list;
+ u8 join_id;
+ u8 backup;
+ u8 reset_reason:4,
+ reset_transient:1,
+ csum_reqd:1,
+ allow_join_id0:1;
+ union {
+ struct {
+ u64 sndr_key;
+ u64 rcvr_key;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ __sum16 csum;
+ };
+ struct {
+ struct mptcp_addr_info addr;
+ u64 ahmac;
+ };
+ struct {
+ struct mptcp_ext ext_copy;
+ u64 fail_seq;
+ };
+ struct {
+ u32 nonce;
+ u32 token;
+ u64 thmac;
+ u8 hmac[MPTCPOPT_HMAC_LEN];
+ };
+ };
#endif
};
#ifdef CONFIG_MPTCP
+extern struct request_sock_ops mptcp_subflow_request_sock_ops;
void mptcp_init(void);
@@ -51,20 +111,25 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
return tcp_rsk(req)->is_mptcp;
}
-void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
- int opsize, struct tcp_options_received *opt_rx);
+static inline bool rsk_drop_req(const struct request_sock *req)
+{
+ return tcp_rsk(req)->is_mptcp && tcp_rsk(req)->drop_req;
+}
+
+void mptcp_space(const struct sock *ssk, int *space, int *full_space);
bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
unsigned int *size, struct mptcp_out_options *opts);
-void mptcp_rcv_synsent(struct sock *sk);
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
struct mptcp_out_options *opts);
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts);
-void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
- struct tcp_options_received *opt_rx);
+bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
-void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
+void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
+ struct mptcp_out_options *opts);
+
+void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);
/* move the skb extension owership, with the assumption that 'to' is
* newly allocated
@@ -83,6 +148,19 @@ static inline void mptcp_skb_ext_move(struct sk_buff *to,
from->active_extensions = 0;
}
+static inline void mptcp_skb_ext_copy(struct sk_buff *to,
+ struct sk_buff *from)
+{
+ struct mptcp_ext *from_ext;
+
+ from_ext = skb_ext_find(from, SKB_EXT_MPTCP);
+ if (!from_ext)
+ return;
+
+ from_ext->frozen = 1;
+ skb_ext_copy(to, from);
+}
+
static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext,
const struct mptcp_ext *from_ext)
{
@@ -106,6 +184,20 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
skb_ext_find(from, SKB_EXT_MPTCP));
}
+void mptcp_seq_show(struct seq_file *seq);
+int mptcp_subflow_init_cookie_req(struct request_sock *req,
+ const struct sock *sk_listener,
+ struct sk_buff *skb);
+
+__be32 mptcp_get_reset_option(const struct sk_buff *skb);
+
+static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
+{
+ if (skb_ext_exist(skb, SKB_EXT_MPTCP))
+ return mptcp_get_reset_option(skb);
+
+ return htonl(0u);
+}
#else
static inline void mptcp_init(void)
@@ -122,10 +214,9 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
return false;
}
-static inline void mptcp_parse_option(const struct sk_buff *skb,
- const unsigned char *ptr, int opsize,
- struct tcp_options_received *opt_rx)
+static inline bool rsk_drop_req(const struct request_sock *req)
{
+ return false;
}
static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
@@ -135,10 +226,6 @@ static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
return false;
}
-static inline void mptcp_rcv_synsent(struct sock *sk)
-{
-}
-
static inline bool mptcp_synack_options(const struct request_sock *req,
unsigned int *size,
struct mptcp_out_options *opts)
@@ -155,10 +242,10 @@ static inline bool mptcp_established_options(struct sock *sk,
return false;
}
-static inline void mptcp_incoming_options(struct sock *sk,
- struct sk_buff *skb,
- struct tcp_options_received *opt_rx)
+static inline bool mptcp_incoming_options(struct sock *sk,
+ struct sk_buff *skb)
{
+ return true;
}
static inline void mptcp_skb_ext_move(struct sk_buff *to,
@@ -166,12 +253,28 @@ static inline void mptcp_skb_ext_move(struct sk_buff *to,
{
}
+static inline void mptcp_skb_ext_copy(struct sk_buff *to,
+ struct sk_buff *from)
+{
+}
+
static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
const struct sk_buff *from)
{
return true;
}
+static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { }
+static inline void mptcp_seq_show(struct seq_file *seq) { }
+
+static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
+ const struct sock *sk_listener,
+ struct sk_buff *skb)
+{
+ return 0; /* TCP fallback */
+}
+
+static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); }
#endif /* CONFIG_MPTCP */
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -182,4 +285,14 @@ static inline int mptcpv6_init(void) { return 0; }
static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
#endif
+#if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL)
+struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk);
+#else
+static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; }
+#endif
+
+#if !IS_ENABLED(CONFIG_MPTCP)
+struct mptcp_sock { };
+#endif
+
#endif /* __NET_MPTCP_H */
diff --git a/include/net/mrp.h b/include/net/mrp.h
index 1c308c034e1a..92cd3fb6cf9d 100644
--- a/include/net/mrp.h
+++ b/include/net/mrp.h
@@ -2,6 +2,10 @@
#ifndef _NET_MRP_H
#define _NET_MRP_H
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+
#define MRP_END_MARK 0x0
struct mrp_pdu_hdr {
diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index fbefe80361ee..08a50d9acb0a 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -2,6 +2,8 @@
#ifndef __NET_NCSI_H
#define __NET_NCSI_H
+#include <linux/types.h>
+
/*
* The NCSI device states seen from external. More NCSI device states are
* only visible internally (in net/ncsi/internal.h). When the NCSI device
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index b5ebeb3b0de0..da7eec8669ec 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -41,6 +41,7 @@ enum {
ND_OPT_DNSSL = 31, /* RFC6106 */
ND_OPT_6CO = 34, /* RFC6775 */
ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */
+ ND_OPT_PREF64 = 38, /* RFC8781 */
__ND_OPT_MAX
};
@@ -80,12 +81,12 @@ extern struct neigh_table nd_tbl;
struct nd_msg {
struct icmp6hdr icmph;
struct in6_addr target;
- __u8 opt[0];
+ __u8 opt[];
};
struct rs_msg {
struct icmp6hdr icmph;
- __u8 opt[0];
+ __u8 opt[];
};
struct ra_msg {
@@ -98,7 +99,7 @@ struct rd_msg {
struct icmp6hdr icmph;
struct in6_addr target;
struct in6_addr dest;
- __u8 opt[0];
+ __u8 opt[];
};
struct nd_opt_hdr {
@@ -136,7 +137,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
u8 *opt, int opt_len,
struct ndisc_options *ndopts);
-void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
int data_len, int pad);
#define NDISC_OPS_REDIRECT_DATA_SPACE 2
@@ -410,13 +411,7 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref(dev, pkey);
- if (n) {
- unsigned long now = jiffies;
-
- /* avoid dirtying neighbour */
- if (READ_ONCE(n->confirmed) != now)
- WRITE_ONCE(n->confirmed, now);
- }
+ neigh_confirm(n);
rcu_read_unlock_bh();
}
@@ -427,13 +422,7 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
- if (n) {
- unsigned long now = jiffies;
-
- /* avoid dirtying neighbour */
- if (READ_ONCE(n->confirmed) != now)
- WRITE_ONCE(n->confirmed, now);
- }
+ neigh_confirm(n);
rcu_read_unlock_bh();
}
@@ -458,10 +447,15 @@ void ndisc_cleanup(void);
int ndisc_rcv(struct sk_buff *skb);
+struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
+ const struct in6_addr *saddr, u64 nonce);
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
const struct in6_addr *daddr, const struct in6_addr *saddr,
u64 nonce);
+void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
+
void ndisc_send_rs(struct net_device *dev,
const struct in6_addr *saddr, const struct in6_addr *daddr);
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
@@ -486,14 +480,14 @@ int igmp6_late_init(void);
void igmp6_cleanup(void);
void igmp6_late_cleanup(void);
-int igmp6_event_query(struct sk_buff *skb);
+void igmp6_event_query(struct sk_buff *skb);
-int igmp6_event_report(struct sk_buff *skb);
+void igmp6_event_report(struct sk_buff *skb);
#ifdef CONFIG_SYSCTL
int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
int ndisc_ifinfo_sysctl_strategy(struct ctl_table *ctl,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 8ec77bfdc1a4..20745cf7ae1a 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -48,6 +48,7 @@ enum {
NEIGH_VAR_RETRANS_TIME,
NEIGH_VAR_BASE_REACHABLE_TIME,
NEIGH_VAR_DELAY_PROBE_TIME,
+ NEIGH_VAR_INTERVAL_PROBE_TIME_MS,
NEIGH_VAR_GC_STALETIME,
NEIGH_VAR_QUEUE_LEN_BYTES,
NEIGH_VAR_PROXY_QLEN,
@@ -70,6 +71,7 @@ enum {
struct neigh_parms {
possible_net_t net;
struct net_device *dev;
+ netdevice_tracker dev_tracker;
struct list_head list;
int (*neigh_setup)(struct neighbour *);
struct neigh_table *tbl;
@@ -81,6 +83,7 @@ struct neigh_parms {
struct rcu_head rcu_head;
int reachable_time;
+ int qlen;
int data[NEIGH_VAR_DATA_MAX];
DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX);
};
@@ -144,19 +147,21 @@ struct neighbour {
struct timer_list timer;
unsigned long used;
atomic_t probes;
- __u8 flags;
- __u8 nud_state;
- __u8 type;
- __u8 dead;
+ u8 nud_state;
+ u8 type;
+ u8 dead;
u8 protocol;
+ u32 flags;
seqlock_t ha_lock;
unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8);
struct hh_cache hh;
int (*output)(struct neighbour *, struct sk_buff *);
const struct neigh_ops *ops;
struct list_head gc_list;
+ struct list_head managed_list;
struct rcu_head rcu;
struct net_device *dev;
+ netdevice_tracker dev_tracker;
u8 primary_key[0];
} __randomize_layout;
@@ -172,9 +177,10 @@ struct pneigh_entry {
struct pneigh_entry *next;
possible_net_t net;
struct net_device *dev;
- u8 flags;
+ netdevice_tracker dev_tracker;
+ u32 flags;
u8 protocol;
- u8 key[0];
+ u8 key[];
};
/*
@@ -204,6 +210,7 @@ struct neigh_table {
int (*pconstructor)(struct pneigh_entry *);
void (*pdestructor)(struct pneigh_entry *);
void (*proxy_redo)(struct sk_buff *skb);
+ int (*is_multicast)(const void *pkey);
bool (*allow_add)(const struct net_device *dev,
struct netlink_ext_ack *extack);
char *id;
@@ -215,11 +222,13 @@ struct neigh_table {
int gc_thresh3;
unsigned long last_flush;
struct delayed_work gc_work;
+ struct delayed_work managed_work;
struct timer_list proxy_timer;
struct sk_buff_head proxy_queue;
atomic_t entries;
atomic_t gc_entries;
struct list_head gc_list;
+ struct list_head managed_list;
rwlock_t lock;
unsigned long last_rand;
struct neigh_statistics __percpu *stats;
@@ -249,20 +258,24 @@ static inline void *neighbour_priv(const struct neighbour *n)
}
/* flags for neigh_update() */
-#define NEIGH_UPDATE_F_OVERRIDE 0x00000001
-#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
-#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
-#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
-#define NEIGH_UPDATE_F_ISROUTER 0x40000000
-#define NEIGH_UPDATE_F_ADMIN 0x80000000
+#define NEIGH_UPDATE_F_OVERRIDE BIT(0)
+#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1)
+#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2)
+#define NEIGH_UPDATE_F_USE BIT(3)
+#define NEIGH_UPDATE_F_MANAGED BIT(4)
+#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5)
+#define NEIGH_UPDATE_F_ISROUTER BIT(6)
+#define NEIGH_UPDATE_F_ADMIN BIT(7)
+
+/* In-kernel representation for NDA_FLAGS_EXT flags: */
+#define NTF_OLD_MASK 0xff
+#define NTF_EXT_SHIFT 8
+#define NTF_EXT_MASK (NTF_EXT_MANAGED)
+
+#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT)
extern const struct nla_policy nda_policy[];
-static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey)
-{
- return *(const u16 *)n->primary_key == *(const u16 *)pkey;
-}
-
static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
{
return *(const u32 *)n->primary_key == *(const u32 *)pkey;
@@ -308,6 +321,17 @@ static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl,
return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev);
}
+static inline void neigh_confirm(struct neighbour *n)
+{
+ if (n) {
+ unsigned long now = jiffies;
+
+ /* avoid dirtying neighbour */
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
+ }
+}
+
void neigh_table_init(int index, struct neigh_table *tbl);
int neigh_table_clear(int index, struct neigh_table *tbl);
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -323,7 +347,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl,
return __neigh_create(tbl, pkey, dev, true);
}
void neigh_destroy(struct neighbour *neigh);
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb);
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok);
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags,
u32 nlmsg_pid);
void __neigh_set_probe_once(struct neighbour *neigh);
@@ -392,13 +417,12 @@ void *neigh_seq_next(struct seq_file *, void *, loff_t *);
void neigh_seq_stop(struct seq_file *, void *);
int neigh_proc_dointvec(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
- void __user *buffer,
+ void *buffer,
size_t *lenp, loff_t *ppos);
int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
proc_handler *proc_handler);
@@ -434,17 +458,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh)
#define neigh_hold(n) refcount_inc(&(n)->refcnt)
-static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+static __always_inline int neigh_event_send_probe(struct neighbour *neigh,
+ struct sk_buff *skb,
+ const bool immediate_ok)
{
unsigned long now = jiffies;
-
+
if (READ_ONCE(neigh->used) != now)
WRITE_ONCE(neigh->used, now);
- if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
- return __neigh_event_send(neigh, skb);
+ if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
+ return __neigh_event_send(neigh, skb, immediate_ok);
return 0;
}
+static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+ return neigh_event_send_probe(neigh, skb, true);
+}
+
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
{
@@ -504,10 +535,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
{
const struct hh_cache *hh = &n->hh;
- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache)
+ /* n->nud_state and hh->hh_len could be changed under us.
+ * neigh_hh_output() is taking care of the race later.
+ */
+ if (!skip_cache &&
+ (READ_ONCE(n->nud_state) & NUD_CONNECTED) &&
+ READ_ONCE(hh->hh_len))
return neigh_hh_output(hh, skb);
- else
- return n->output(n, skb);
+
+ return n->output(n, skb);
}
static inline struct neighbour *
diff --git a/include/net/net_debug.h b/include/net/net_debug.h
new file mode 100644
index 000000000000..1e74684cbbdb
--- /dev/null
+++ b/include/net/net_debug.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NET_DEBUG_H
+#define _LINUX_NET_DEBUG_H
+
+#include <linux/bug.h>
+#include <linux/kern_levels.h>
+
+struct net_device;
+
+__printf(3, 4) __cold
+void netdev_printk(const char *level, const struct net_device *dev,
+ const char *format, ...);
+__printf(2, 3) __cold
+void netdev_emerg(const struct net_device *dev, const char *format, ...);
+__printf(2, 3) __cold
+void netdev_alert(const struct net_device *dev, const char *format, ...);
+__printf(2, 3) __cold
+void netdev_crit(const struct net_device *dev, const char *format, ...);
+__printf(2, 3) __cold
+void netdev_err(const struct net_device *dev, const char *format, ...);
+__printf(2, 3) __cold
+void netdev_warn(const struct net_device *dev, const char *format, ...);
+__printf(2, 3) __cold
+void netdev_notice(const struct net_device *dev, const char *format, ...);
+__printf(2, 3) __cold
+void netdev_info(const struct net_device *dev, const char *format, ...);
+
+#define netdev_level_once(level, dev, fmt, ...) \
+do { \
+ static bool __section(".data.once") __print_once; \
+ \
+ if (!__print_once) { \
+ __print_once = true; \
+ netdev_printk(level, dev, fmt, ##__VA_ARGS__); \
+ } \
+} while (0)
+
+#define netdev_emerg_once(dev, fmt, ...) \
+ netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__)
+#define netdev_alert_once(dev, fmt, ...) \
+ netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__)
+#define netdev_crit_once(dev, fmt, ...) \
+ netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__)
+#define netdev_err_once(dev, fmt, ...) \
+ netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__)
+#define netdev_warn_once(dev, fmt, ...) \
+ netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__)
+#define netdev_notice_once(dev, fmt, ...) \
+ netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__)
+#define netdev_info_once(dev, fmt, ...) \
+ netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+#define netdev_dbg(__dev, format, args...) \
+do { \
+ dynamic_netdev_dbg(__dev, format, ##args); \
+} while (0)
+#elif defined(DEBUG)
+#define netdev_dbg(__dev, format, args...) \
+ netdev_printk(KERN_DEBUG, __dev, format, ##args)
+#else
+#define netdev_dbg(__dev, format, args...) \
+({ \
+ if (0) \
+ netdev_printk(KERN_DEBUG, __dev, format, ##args); \
+})
+#endif
+
+#if defined(VERBOSE_DEBUG)
+#define netdev_vdbg netdev_dbg
+#else
+
+#define netdev_vdbg(dev, format, args...) \
+({ \
+ if (0) \
+ netdev_printk(KERN_DEBUG, dev, format, ##args); \
+ 0; \
+})
+#endif
+
+/* netif printk helpers, similar to netdev_printk */
+
+#define netif_printk(priv, type, level, dev, fmt, args...) \
+do { \
+ if (netif_msg_##type(priv)) \
+ netdev_printk(level, (dev), fmt, ##args); \
+} while (0)
+
+#define netif_level(level, priv, type, dev, fmt, args...) \
+do { \
+ if (netif_msg_##type(priv)) \
+ netdev_##level(dev, fmt, ##args); \
+} while (0)
+
+#define netif_emerg(priv, type, dev, fmt, args...) \
+ netif_level(emerg, priv, type, dev, fmt, ##args)
+#define netif_alert(priv, type, dev, fmt, args...) \
+ netif_level(alert, priv, type, dev, fmt, ##args)
+#define netif_crit(priv, type, dev, fmt, args...) \
+ netif_level(crit, priv, type, dev, fmt, ##args)
+#define netif_err(priv, type, dev, fmt, args...) \
+ netif_level(err, priv, type, dev, fmt, ##args)
+#define netif_warn(priv, type, dev, fmt, args...) \
+ netif_level(warn, priv, type, dev, fmt, ##args)
+#define netif_notice(priv, type, dev, fmt, args...) \
+ netif_level(notice, priv, type, dev, fmt, ##args)
+#define netif_info(priv, type, dev, fmt, args...) \
+ netif_level(info, priv, type, dev, fmt, ##args)
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+#define netif_dbg(priv, type, netdev, format, args...) \
+do { \
+ if (netif_msg_##type(priv)) \
+ dynamic_netdev_dbg(netdev, format, ##args); \
+} while (0)
+#elif defined(DEBUG)
+#define netif_dbg(priv, type, dev, format, args...) \
+ netif_printk(priv, type, KERN_DEBUG, dev, format, ##args)
+#else
+#define netif_dbg(priv, type, dev, format, args...) \
+({ \
+ if (0) \
+ netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \
+ 0; \
+})
+#endif
+
+/* if @cond then downgrade to debug, else print at @level */
+#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \
+ do { \
+ if (cond) \
+ netif_dbg(priv, type, netdev, fmt, ##args); \
+ else \
+ netif_ ## level(priv, type, netdev, fmt, ##args); \
+ } while (0)
+
+#if defined(VERBOSE_DEBUG)
+#define netif_vdbg netif_dbg
+#else
+#define netif_vdbg(priv, type, dev, format, args...) \
+({ \
+ if (0) \
+ netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \
+ 0; \
+})
+#endif
+
+
+#if defined(CONFIG_DEBUG_NET)
+#define DEBUG_NET_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
+#else
+#define DEBUG_NET_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#endif
+
+#endif /* _LINUX_NET_DEBUG_H */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 854d39ef1ca3..8c3587d5c308 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -22,17 +22,22 @@
#include <net/netns/nexthop.h>
#include <net/netns/ieee802154_6lowpan.h>
#include <net/netns/sctp.h>
-#include <net/netns/dccp.h>
#include <net/netns/netfilter.h>
-#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
#endif
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+#include <net/netns/flow_table.h>
+#endif
#include <net/netns/nftables.h>
#include <net/netns/xfrm.h>
#include <net/netns/mpls.h>
#include <net/netns/can.h>
#include <net/netns/xdp.h>
+#include <net/netns/smc.h>
+#include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
+#include <net/net_trackers.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
@@ -59,12 +64,9 @@ struct net {
refcount_t passive; /* To decide when the network
* namespace should be freed.
*/
- refcount_t count; /* To decided when the network
- * namespace should be shut down.
- */
spinlock_t rules_mod_lock;
- unsigned int dev_unreg_count;
+ atomic_t dev_unreg_count;
unsigned int dev_base_seq; /* protected by rtnl_mutex */
int ifindex;
@@ -89,6 +91,7 @@ struct net {
struct idr netns_ids;
struct ns_common ns;
+ struct ref_tracker_dir refcnt_tracker;
struct list_head dev_base_head;
struct proc_dir_entry *proc_net;
@@ -120,7 +123,9 @@ struct net {
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
+#if IS_ENABLED(CONFIG_UNIX)
struct netns_unix unx;
+#endif
struct netns_nexthop nexthop;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
@@ -132,29 +137,16 @@ struct net {
#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
struct netns_sctp sctp;
#endif
-#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
- struct netns_dccp dccp;
-#endif
#ifdef CONFIG_NETFILTER
struct netns_nf nf;
- struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
#endif
#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
struct netns_nftables nft;
#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- struct netns_nf_frag nf_frag;
- struct ctl_table_header *nf_frag_frags_hdr;
-#endif
- struct sock *nfnl;
- struct sock *nfnl_stash;
-#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
- struct list_head nfnl_acct_list;
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
- struct list_head nfct_timeout_list;
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ struct netns_ft ft;
#endif
#endif
#ifdef CONFIG_WEXT_CORE
@@ -162,12 +154,16 @@ struct net {
#endif
struct net_generic __rcu *gen;
- struct bpf_prog __rcu *flow_dissector_prog;
+ /* Used to store attached BPF programs */
+ struct netns_bpf bpf;
/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
+
+ u64 net_cookie; /* written once */
+
#if IS_ENABLED(CONFIG_IP_VS)
struct netns_ipvs *ipvs;
#endif
@@ -180,10 +176,16 @@ struct net {
#ifdef CONFIG_XDP_SOCKETS
struct netns_xdp xdp;
#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct netns_mctp mctp;
+#endif
#if IS_ENABLED(CONFIG_CRYPTO_USER)
struct sock *crypto_nlsk;
#endif
struct sock *diag_nlsk;
+#if IS_ENABLED(CONFIG_SMC)
+ struct netns_smc smc;
+#endif
} __randomize_layout;
#include <linux/seq_file_net.h>
@@ -198,6 +200,9 @@ struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
void net_ns_barrier(void);
+
+struct ns_common *get_net_ns(struct ns_common *ns);
+struct net *get_net_ns_by_fd(int fd);
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
@@ -217,13 +222,22 @@ static inline void net_ns_get_ownership(const struct net *net,
}
static inline void net_ns_barrier(void) {}
+
+static inline struct ns_common *get_net_ns(struct ns_common *ns)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline struct net *get_net_ns_by_fd(int fd)
+{
+ return ERR_PTR(-EINVAL);
+}
#endif /* CONFIG_NET_NS */
extern struct list_head net_namespace_list;
struct net *get_net_ns_by_pid(pid_t pid);
-struct net *get_net_ns_by_fd(int fd);
#ifdef CONFIG_SYSCTL
void ipx_register_sysctl(void);
@@ -236,9 +250,10 @@ void ipx_unregister_sysctl(void);
#ifdef CONFIG_NET_NS
void __put_net(struct net *net);
+/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
- refcount_inc(&net->count);
+ refcount_inc(&net->ns.count);
return net;
}
@@ -249,14 +264,15 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!refcount_inc_not_zero(&net->count))
+ if (!refcount_inc_not_zero(&net->ns.count))
net = NULL;
return net;
}
+/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
- if (refcount_dec_and_test(&net->count))
+ if (refcount_dec_and_test(&net->ns.count))
__put_net(net);
}
@@ -268,7 +284,7 @@ int net_eq(const struct net *net1, const struct net *net2)
static inline int check_net(const struct net *net)
{
- return refcount_read(&net->count) != 0;
+ return refcount_read(&net->ns.count) != 0;
}
void net_drop_ns(void *);
@@ -304,6 +320,36 @@ static inline int check_net(const struct net *net)
#endif
+static inline void netns_tracker_alloc(struct net *net,
+ netns_tracker *tracker, gfp_t gfp)
+{
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+ ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp);
+#endif
+}
+
+static inline void netns_tracker_free(struct net *net,
+ netns_tracker *tracker)
+{
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+ ref_tracker_free(&net->refcnt_tracker, tracker);
+#endif
+}
+
+static inline struct net *get_net_track(struct net *net,
+ netns_tracker *tracker, gfp_t gfp)
+{
+ get_net(net);
+ netns_tracker_alloc(net, tracker, gfp);
+ return net;
+}
+
+static inline void put_net_track(struct net *net, netns_tracker *tracker)
+{
+ netns_tracker_free(net, tracker);
+ put_net(net);
+}
+
typedef struct {
#ifdef CONFIG_NET_NS
struct net *net;
@@ -408,7 +454,6 @@ int register_pernet_device(struct pernet_operations *);
void unregister_pernet_device(struct pernet_operations *);
struct ctl_table;
-struct ctl_table_header;
#ifdef CONFIG_SYSCTL
int net_sysctl_init(void);
@@ -432,6 +477,13 @@ static inline int rt_genid_ipv4(const struct net *net)
return atomic_read(&net->ipv4.rt_genid);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int rt_genid_ipv6(const struct net *net)
+{
+ return atomic_read(&net->ipv6.fib6_sernum);
+}
+#endif
+
static inline void rt_genid_bump_ipv4(struct net *net)
{
atomic_inc(&net->ipv4.rt_genid);
@@ -469,4 +521,10 @@ static inline void fnhe_genid_bump(struct net *net)
atomic_inc(&net->fnhe_genid);
}
+#ifdef CONFIG_NET
+void net_ns_init(void);
+#else
+static inline void net_ns_init(void) {}
+#endif
+
#endif /* __NET_NET_NAMESPACE_H */
diff --git a/include/net/net_trackers.h b/include/net/net_trackers.h
new file mode 100644
index 000000000000..d94c76cf15a9
--- /dev/null
+++ b/include/net/net_trackers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_NET_TRACKERS_H
+#define __NET_NET_TRACKERS_H
+#include <linux/ref_tracker.h>
+
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+typedef struct ref_tracker *netdevice_tracker;
+#else
+typedef struct {} netdevice_tracker;
+#endif
+
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+typedef struct ref_tracker *netns_tracker;
+#else
+typedef struct {} netns_tracker;
+#endif
+
+#endif /* __NET_NET_TRACKERS_H */
diff --git a/include/net/netevent.h b/include/net/netevent.h
index 4107016c3bb4..1be3757a8b7f 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -14,6 +14,7 @@
struct dst_entry;
struct neighbour;
+struct notifier_block ;
struct netevent_redirect {
struct dst_entry *old;
diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
index bcbd724cc048..7fda9ce9f694 100644
--- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
@@ -3,6 +3,7 @@
#define _NF_DEFRAG_IPV4_H
struct net;
-int nf_defrag_ipv4_enable(struct net *);
+int nf_defrag_ipv4_enable(struct net *net);
+void nf_defrag_ipv4_disable(struct net *net);
#endif /* _NF_DEFRAG_IPV4_H */
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 40e0e0623f46..c653fcb88354 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -8,8 +8,8 @@
#include <net/netfilter/nf_reject.h>
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook);
-void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook);
-
+void nf_send_reset(struct net *net, struct sock *, struct sk_buff *oldskb,
+ int hook);
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *_oth, int hook);
struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
@@ -18,4 +18,14 @@ struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
const struct tcphdr *oth);
+struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook, u8 code);
+struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook);
+
+
#endif /* _IPV4_NF_REJECT_H */
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 7b3c873f8839..e95483192d1b 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -4,7 +4,4 @@
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
-#include <linux/sysctl.h>
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
-
#endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 6d31cd041143..ceadf8ba25a4 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -5,7 +5,8 @@
#include <linux/skbuff.h>
#include <linux/types.h>
-int nf_defrag_ipv6_enable(struct net *);
+int nf_defrag_ipv6_enable(struct net *net);
+void nf_defrag_ipv6_disable(struct net *net);
int nf_ct_frag6_init(void);
void nf_ct_frag6_cleanup(void);
@@ -13,4 +14,9 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
struct inet_frags_ctl;
+struct nft_ct_frag6_pernet {
+ struct ctl_table_header *nf_frag_frags_hdr;
+ struct fqdir *fqdir;
+};
+
#endif /* _NF_DEFRAG_IPV6_H */
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 4a3ef9ebdf6f..d729344ba644 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -7,9 +7,8 @@
void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
unsigned int hooknum);
-
-void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook);
-
+void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
+ int hook);
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *otcph,
unsigned int *otcplen, int hook);
@@ -20,4 +19,13 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
const struct sk_buff *oldskb,
const struct tcphdr *oth, unsigned int otcplen);
+struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook);
+struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook, u8 code);
+
#endif /* _IPV6_NF_REJECT_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 9f551f3b69c6..6a2019aaa464 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -43,10 +43,27 @@ union nf_conntrack_expect_proto {
/* insert expect proto private data here */
};
+struct nf_conntrack_net_ecache {
+ struct delayed_work dwork;
+ spinlock_t dying_lock;
+ struct hlist_nulls_head dying_list;
+};
+
struct nf_conntrack_net {
+ /* only used when new connection is allocated: */
+ atomic_t count;
+ unsigned int expect_count;
+
+ /* only used from work queues, configuration plane, and so on: */
unsigned int users4;
unsigned int users6;
unsigned int users_bridge;
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_header;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ struct nf_conntrack_net_ecache ecache;
+#endif
};
#include <linux/types.h>
@@ -62,6 +79,8 @@ struct nf_conn {
* Hint, SKB address this struct and refcnt via skb->_nfct and
* helpers nf_conntrack_get() and nf_conntrack_put().
* Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
+ * except that the latter uses internal indirection and does not
+ * result in a conntrack module dependency.
* beware nf_ct_get() is different and don't inc refcnt.
*/
struct nf_conntrack ct_general;
@@ -80,14 +99,13 @@ struct nf_conn {
/* Have we seen traffic both ways yet? (bitset) */
unsigned long status;
- u16 cpu;
possible_net_t ct_net;
#if IS_ENABLED(CONFIG_NF_NAT)
struct hlist_node nat_bysource;
#endif
/* all members below initialized via memset */
- u8 __nfct_init_offset[0];
+ struct { } __nfct_init_offset;
/* If we were expected by an expectation, this will be it */
struct nf_conn *master;
@@ -155,11 +173,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
return (struct nf_conn *)(nfct & NFCT_PTRMASK);
}
+void nf_ct_destroy(struct nf_conntrack *nfct);
+
/* decrement reference count on a conntrack */
static inline void nf_ct_put(struct nf_conn *ct)
{
- WARN_ON(!ct);
- nf_conntrack_put(&ct->ct_general);
+ if (ct && refcount_dec_and_test(&ct->ct_general.use))
+ nf_ct_destroy(&ct->ct_general);
}
/* Protocol module loading */
@@ -214,13 +234,16 @@ static inline bool nf_ct_kill(struct nf_conn *ct)
return nf_ct_delete(ct, 0, 0);
}
-/* Set all unconfirmed conntrack as dying */
-void nf_ct_unconfirmed_destroy(struct net *);
+struct nf_ct_iter_data {
+ struct net *net;
+ void *data;
+ u32 portid;
+ int report;
+};
/* Iterate over all conntracks: if iter returns true, it's deleted. */
-void nf_ct_iterate_cleanup_net(struct net *net,
- int (*iter)(struct nf_conn *i, void *data),
- void *data, u32 portid, int report);
+void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data),
+ const struct nf_ct_iter_data *iter_data);
/* also set unconfirmed conntracks as dying. Only use in module exit path. */
void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data),
@@ -262,14 +285,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
/* jiffies until ct expires, 0 if already expired */
static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
{
- s32 timeout = ct->timeout - nfct_time_stamp;
+ s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
- return timeout > 0 ? timeout : 0;
+ return max(timeout, 0);
}
static inline bool nf_ct_is_expired(const struct nf_conn *ct)
{
- return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
+ return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
}
/* use after obtaining a reference count */
@@ -279,6 +302,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
!nf_ct_is_dying(ct);
}
+#define NF_CT_DAY (86400 * HZ)
+
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
+{
+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+}
+
struct kernel_param;
int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
@@ -286,7 +321,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize);
extern struct hlist_nulls_head *nf_conntrack_hash;
extern unsigned int nf_conntrack_htable_size;
-extern seqcount_t nf_conntrack_generation;
+extern seqcount_spinlock_t nf_conntrack_generation;
extern unsigned int nf_conntrack_max;
/* must be called with rcu read lock held */
@@ -312,6 +347,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
void nf_ct_tmpl_free(struct nf_conn *tmpl);
u32 nf_ct_get_id(const struct nf_conn *ct);
+u32 nf_conntrack_count(const struct net *net);
static inline void
nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
@@ -319,6 +355,13 @@ nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
skb_set_nfct(skb, (unsigned long)ct | info);
}
+extern unsigned int nf_conntrack_net_id;
+
+static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net)
+{
+ return net_generic(net, nf_conntrack_net_id);
+}
+
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index f7a060c6eb28..4b2b7f8914ea 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -65,9 +65,19 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
#endif
}
+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
+ unsigned int bytes);
+
+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
+ unsigned int bytes)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ nf_ct_acct_add(ct, dir, 1, bytes);
+#endif
+}
+
void nf_conntrack_acct_pernet_init(struct net *net);
-int nf_conntrack_acct_init(void);
void nf_conntrack_acct_fini(void);
#endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h
new file mode 100644
index 000000000000..078d3c52c03f
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_act_ct.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_ACT_CT_H
+#define _NF_CONNTRACK_ACT_CT_H
+
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_act_ct_ext {
+ int ifindex[IP_CT_DIR_MAX];
+};
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
+#else
+ return NULL;
+#endif
+}
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
+
+ if (act_ct)
+ return act_ct;
+
+ act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC);
+ return act_ct;
+#else
+ return NULL;
+#endif
+}
+
+static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ struct nf_conn_act_ct_ext *act_ct_ext;
+
+ act_ct_ext = nf_conn_act_ct_ext_find(ct);
+ if (dev_net(skb->dev) == &init_net && act_ct_ext)
+ act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex;
+#endif
+}
+
+#endif /* _NF_CONNTRACK_ACT_CT_H */
diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h
new file mode 100644
index 000000000000..2d0da478c8e0
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_bpf.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_BPF_H
+#define _NF_CONNTRACK_BPF_H
+
+#include <linux/kconfig.h>
+#include <net/netfilter/nf_conntrack.h>
+
+struct nf_conn___init {
+ struct nf_conn ct;
+};
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_conntrack_bpf(void);
+extern void cleanup_nf_conntrack_bpf(void);
+
+#else
+
+static inline int register_nf_conntrack_bpf(void)
+{
+ return 0;
+}
+
+static inline void cleanup_nf_conntrack_bpf(void)
+{
+}
+
+#endif
+
+#if (IS_BUILTIN(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_nat_bpf(void);
+
+#else
+
+static inline int register_nf_nat_bpf(void)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* _NF_CONNTRACK_BPF_H */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 09f2efea0b97..b2b9de70d9f4 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -30,7 +30,6 @@ void nf_conntrack_cleanup_net(struct net *net);
void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list);
void nf_conntrack_proto_pernet_init(struct net *net);
-void nf_conntrack_proto_pernet_fini(struct net *net);
int nf_conntrack_proto_init(void);
void nf_conntrack_proto_fini(void);
@@ -59,9 +58,14 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
int ret = NF_ACCEPT;
if (ct) {
- if (!nf_ct_is_confirmed(ct))
+ if (!nf_ct_is_confirmed(ct)) {
ret = __nf_conntrack_confirm(skb);
- if (likely(ret == NF_ACCEPT))
+
+ if (ret == NF_ACCEPT)
+ ct = (struct nf_conn *)skb_nfct(skb);
+ }
+
+ if (ret == NF_ACCEPT && nf_ct_ecache_exist(ct))
nf_ct_deliver_cached_events(ct);
}
return ret;
@@ -80,4 +84,17 @@ void nf_conntrack_lock(spinlock_t *lock);
extern spinlock_t nf_conntrack_expect_lock;
+/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
+
+static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
+{
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+}
+
+int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
+void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off);
+int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status);
+
#endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 9645b47fa7e4..e227d997fc71 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -10,6 +10,7 @@ struct nf_conncount_data;
struct nf_conncount_list {
spinlock_t list_lock;
+ u32 last_gc; /* jiffies at most recent gc */
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
};
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index eb81f9195e28..0c1dac318e02 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -14,17 +14,15 @@
#include <net/netfilter/nf_conntrack_extend.h>
enum nf_ct_ecache_state {
- NFCT_ECACHE_UNKNOWN, /* destroy event not sent */
NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */
NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */
};
struct nf_conntrack_ecache {
unsigned long cache; /* bitops want long */
- u16 missed; /* missed events */
u16 ctmask; /* bitmask of ct events to be delivered */
u16 expmask; /* bitmask of expect events to be delivered */
- enum nf_ct_ecache_state state:8;/* ecache state */
+ u32 missed; /* missed events */
u32 portid; /* netlink portid of destroyer */
};
@@ -38,28 +36,12 @@ nf_ct_ecache_find(const struct nf_conn *ct)
#endif
}
-static inline struct nf_conntrack_ecache *
-nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
+static inline bool nf_ct_ecache_exist(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *e;
-
- if (!ctmask && !expmask && net->ct.sysctl_events) {
- ctmask = ~0;
- expmask = ~0;
- }
- if (!ctmask && !expmask)
- return NULL;
-
- e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
- if (e) {
- e->ctmask = ctmask;
- e->expmask = expmask;
- }
- return e;
+ return nf_ct_ext_exist(ct, NF_CT_EXT_ECACHE);
#else
- return NULL;
+ return false;
#endif
}
@@ -72,19 +54,26 @@ struct nf_ct_event {
int report;
};
+struct nf_exp_event {
+ struct nf_conntrack_expect *exp;
+ u32 portid;
+ int report;
+};
+
struct nf_ct_event_notifier {
- int (*fcn)(unsigned int events, struct nf_ct_event *item);
+ int (*ct_event)(unsigned int events, const struct nf_ct_event *item);
+ int (*exp_event)(unsigned int events, const struct nf_exp_event *item);
};
-int nf_conntrack_register_notifier(struct net *net,
- struct nf_ct_event_notifier *nb);
-void nf_conntrack_unregister_notifier(struct net *net,
- struct nf_ct_event_notifier *nb);
+void nf_conntrack_register_notifier(struct net *net,
+ const struct nf_ct_event_notifier *nb);
+void nf_conntrack_unregister_notifier(struct net *net);
void nf_ct_deliver_cached_events(struct nf_conn *ct);
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
u32 portid, int report);
+bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp);
#else
static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct)
@@ -99,6 +88,10 @@ static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
return 0;
}
+static inline bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
+{
+ return false;
+}
#endif
static inline void
@@ -124,59 +117,38 @@ nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
u32 portid, int report)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- const struct net *net = nf_ct_net(ct);
-
- if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
- return 0;
-
- return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
-#else
- return 0;
+ if (nf_ct_ecache_exist(ct))
+ return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
#endif
+ return 0;
}
static inline int
nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- const struct net *net = nf_ct_net(ct);
-
- if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
- return 0;
-
- return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
-#else
- return 0;
+ if (nf_ct_ecache_exist(ct))
+ return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
#endif
+ return 0;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-
-struct nf_exp_event {
- struct nf_conntrack_expect *exp;
- u32 portid;
- int report;
-};
-
-struct nf_exp_event_notifier {
- int (*fcn)(unsigned int events, struct nf_exp_event *item);
-};
-
-int nf_ct_expect_register_notifier(struct net *net,
- struct nf_exp_event_notifier *nb);
-void nf_ct_expect_unregister_notifier(struct net *net,
- struct nf_exp_event_notifier *nb);
-
void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp,
u32 portid, int report);
+void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state);
+
void nf_conntrack_ecache_pernet_init(struct net *net);
void nf_conntrack_ecache_pernet_fini(struct net *net);
-int nf_conntrack_ecache_init(void);
-void nf_conntrack_ecache_fini(void);
+struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net);
+static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net)
+{
+ return net->ct.ecache_dwork_pending;
+}
#else /* CONFIG_NF_CONNTRACK_EVENTS */
static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
@@ -186,43 +158,18 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
{
}
-static inline void nf_conntrack_ecache_pernet_init(struct net *net)
+static inline void nf_conntrack_ecache_work(struct net *net,
+ enum nf_ct_ecache_state s)
{
}
-static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
+static inline void nf_conntrack_ecache_pernet_init(struct net *net)
{
}
-static inline int nf_conntrack_ecache_init(void)
-{
- return 0;
-}
-
-static inline void nf_conntrack_ecache_fini(void)
+static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
{
}
-
+static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; }
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
-
-static inline void nf_conntrack_ecache_delayed_work(struct net *net)
-{
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- if (!delayed_work_pending(&net->ct.ecache_dwork)) {
- schedule_delayed_work(&net->ct.ecache_dwork, HZ);
- net->ct.ecache_dwork_pending = true;
- }
-#endif
-}
-
-static inline void nf_conntrack_ecache_work(struct net *net)
-{
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- if (net->ct.ecache_dwork_pending) {
- net->ct.ecache_dwork_pending = false;
- mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
- }
-#endif
-}
-
#endif /*_NF_CONNTRACK_ECACHE_H*/
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 5ae5295aa46d..0b247248b032 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -28,24 +28,18 @@ enum nf_ct_ext_id {
#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
NF_CT_EXT_SYNPROXY,
#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ NF_CT_EXT_ACT_CT,
+#endif
NF_CT_EXT_NUM,
};
-#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
-#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
-#define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj
-#define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct
-#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
-#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
-#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
-#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
-#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
-
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
u8 offset[NF_CT_EXT_NUM];
u8 len;
- char data[0];
+ unsigned int gen_id;
+ char data[] __aligned(8);
};
static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id)
@@ -58,33 +52,28 @@ static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id)
return (ct->ext && __nf_ct_ext_exist(ct->ext, id));
}
-static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
+void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id);
+
+static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id)
{
- if (!nf_ct_ext_exist(ct, id))
+ struct nf_ct_ext *ext = ct->ext;
+
+ if (!ext || !__nf_ct_ext_exist(ext, id))
return NULL;
+ if (unlikely(ext->gen_id))
+ return __nf_ct_ext_find(ext, id);
+
return (void *)ct->ext + ct->ext->offset[id];
}
-#define nf_ct_ext_find(ext, id) \
- ((id##_TYPE *)__nf_ct_ext_find((ext), (id)))
-
-/* Destroy all relationships */
-void nf_ct_ext_destroy(struct nf_conn *ct);
/* Add this type, returns pointer to data or NULL. */
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
-struct nf_ct_ext_type {
- /* Destroys relationships (can be NULL). */
- void (*destroy)(struct nf_conn *ct);
-
- enum nf_ct_ext_id id;
-
- /* Length and min alignment. */
- u8 len;
- u8 align;
-};
+/* ext genid. if ext->id != ext_genid, extensions cannot be used
+ * anymore unless conntrack has CONFIRMED bit set.
+ */
+extern atomic_t nf_conntrack_ext_genid;
+void nf_ct_ext_bump_genid(void);
-int nf_ct_extend_register(const struct nf_ct_ext_type *type);
-void nf_ct_extend_unregister(const struct nf_ct_ext_type *type);
#endif /* _NF_CONNTRACK_EXTEND_H */
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 37f0fbefb060..9939c366f720 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat);
int nf_nat_helper_try_module_get(const char *name, u16 l3num,
u8 protonum);
void nf_nat_helper_put(struct nf_conntrack_helper *helper);
+void nf_ct_set_auto_assign_helper_warned(struct net *net);
#endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 4cad1f0a327a..1f47bef51722 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -32,7 +32,7 @@ struct nf_conntrack_l4proto {
/* convert protoinfo to nfnetink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
- struct nf_conn *ct);
+ struct nf_conn *ct, bool destroy);
/* convert nfnetlink attributes to protoinfo */
int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct);
@@ -42,7 +42,8 @@ struct nf_conntrack_l4proto {
/* Calculate tuple nlattr size */
unsigned int (*nlattr_tuple_size)(void);
int (*nlattr_to_tuple)(struct nlattr *tb[],
- struct nf_conntrack_tuple *t);
+ struct nf_conntrack_tuple *t,
+ u_int32_t flags);
const struct nla_policy *nla_policy;
struct {
@@ -152,27 +153,32 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto);
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
- struct nf_conntrack_tuple *t);
+ struct nf_conntrack_tuple *t,
+ u_int32_t flags);
unsigned int nf_ct_port_nlattr_tuple_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
-__printf(3, 4) __cold
+__printf(4, 5) __cold
void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
const struct nf_conn *ct,
+ const struct nf_hook_state *state,
const char *fmt, ...);
-__printf(5, 6) __cold
+__printf(4, 5) __cold
void nf_l4proto_log_invalid(const struct sk_buff *skb,
- struct net *net,
- u16 pf, u8 protonum,
+ const struct nf_hook_state *state,
+ u8 protonum,
const char *fmt, ...);
#else
-static inline __printf(5, 6) __cold
-void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net,
- u16 pf, u8 protonum, const char *fmt, ...) {}
-static inline __printf(3, 4) __cold
+static inline __printf(4, 5) __cold
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ u8 protonum,
+ const char *fmt, ...) {}
+static inline __printf(4, 5) __cold
void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
const struct nf_conn *ct,
+ const struct nf_hook_state *state,
const char *fmt, ...) { }
#endif /* CONFIG_SYSCTL */
@@ -201,6 +207,20 @@ static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net)
{
return &net->ct.nf_ct_proto.icmpv6;
}
+
+/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */
+static inline void nf_ct_set_tcp_be_liberal(struct nf_conn *ct)
+{
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+}
+
+/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */
+static inline bool nf_conntrack_tcp_established(const struct nf_conn *ct)
+{
+ return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
+ test_bit(IPS_ASSURED_BIT, &ct->status);
+}
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index ba916411c4e1..66bab6c60d12 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -17,10 +17,18 @@ struct nf_conn_labels {
unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)];
};
+/* Can't use nf_ct_ext_find(), flow dissector cannot use symbols
+ * exported by nf_conntrack module.
+ */
static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_LABELS
- return nf_ct_ext_find(ct, NF_CT_EXT_LABELS);
+ struct nf_ct_ext *ext = ct->ext;
+
+ if (!ext || !__nf_ct_ext_exist(ext, NF_CT_EXT_LABELS))
+ return NULL;
+
+ return (void *)ct->ext + ct->ext->offset[NF_CT_EXT_LABELS];
#else
return NULL;
#endif
@@ -45,12 +53,9 @@ int nf_connlabels_replace(struct nf_conn *ct,
#ifdef CONFIG_NF_CONNTRACK_LABELS
int nf_conntrack_labels_init(void);
-void nf_conntrack_labels_fini(void);
int nf_connlabels_get(struct net *net, unsigned int bit);
void nf_connlabels_put(struct net *net);
#else
-static inline int nf_conntrack_labels_init(void) { return 0; }
-static inline void nf_conntrack_labels_fini(void) {}
static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; }
static inline void nf_connlabels_put(struct net *net) {}
#endif
diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h
index 0a10b50537ae..883c414b768e 100644
--- a/include/net/netfilter/nf_conntrack_seqadj.h
+++ b/include/net/netfilter/nf_conntrack_seqadj.h
@@ -42,7 +42,4 @@ int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, unsigned int protoff);
s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, u32 seq);
-int nf_conntrack_seqadj_init(void);
-void nf_conntrack_seqadj_fini(void);
-
#endif /* _NF_CONNTRACK_SEQADJ_H */
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 6dd72396f534..9fdaba911de6 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -14,15 +14,7 @@
struct nf_ct_timeout {
__u16 l3num;
const struct nf_conntrack_l4proto *l4proto;
- char data[0];
-};
-
-struct ctnl_timeout {
- struct list_head head;
- struct rcu_head rcu_head;
- refcount_t refcnt;
- char name[CTNL_TIMEOUT_NAME_MAX];
- struct nf_ct_timeout timeout;
+ char data[];
};
struct nf_conn_timeout {
@@ -89,23 +81,11 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-int nf_conntrack_timeout_init(void);
-void nf_conntrack_timeout_fini(void);
void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout);
int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num,
const char *timeout_name);
void nf_ct_destroy_timeout(struct nf_conn *ct);
#else
-static inline int nf_conntrack_timeout_init(void)
-{
- return 0;
-}
-
-static inline void nf_conntrack_timeout_fini(void)
-{
- return;
-}
-
static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
u8 l3num, u8 l4num,
const char *timeout_name)
@@ -120,8 +100,12 @@ static inline void nf_ct_destroy_timeout(struct nf_conn *ct)
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-extern struct nf_ct_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
-extern void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout);
+struct nf_ct_timeout_hooks {
+ struct nf_ct_timeout *(*timeout_find_get)(struct net *net, const char *name);
+ void (*timeout_put)(struct nf_ct_timeout *timeout);
+};
+
+extern const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook;
#endif
#endif /* _NF_CONNTRACK_TIMEOUT_H */
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
index 820ea34b6029..57138d974a9f 100644
--- a/include/net/netfilter/nf_conntrack_timestamp.h
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -40,21 +40,8 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
void nf_conntrack_tstamp_pernet_init(struct net *net);
-
-int nf_conntrack_tstamp_init(void);
-void nf_conntrack_tstamp_fini(void);
#else
static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {}
-
-static inline int nf_conntrack_tstamp_init(void)
-{
- return 0;
-}
-
-static inline void nf_conntrack_tstamp_fini(void)
-{
- return;
-}
#endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */
#endif /* _NF_CONNTRACK_TSTAMP_H */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index e0f709d9d547..cd982f4a0f50 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -10,12 +10,45 @@
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/flow_offload.h>
#include <net/dst.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
struct nf_flowtable;
struct nf_flow_rule;
struct flow_offload;
enum flow_offload_tuple_dir;
+struct nf_flow_key {
+ struct flow_dissector_key_meta meta;
+ struct flow_dissector_key_control control;
+ struct flow_dissector_key_control enc_control;
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_vlan vlan;
+ struct flow_dissector_key_vlan cvlan;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ };
+ struct flow_dissector_key_keyid enc_key_id;
+ union {
+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
+ };
+ struct flow_dissector_key_tcp tcp;
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct nf_flow_match {
+ struct flow_dissector dissector;
+ struct nf_flow_key key;
+ struct nf_flow_key mask;
+};
+
+struct nf_flow_rule {
+ struct nf_flow_match match;
+ struct flow_rule *rule;
+};
+
struct nf_flowtable_type {
struct list_head list;
int family;
@@ -33,7 +66,8 @@ struct nf_flowtable_type {
};
enum nf_flowtable_flags {
- NF_FLOWTABLE_HW_OFFLOAD = 0x1,
+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
};
struct nf_flowtable {
@@ -44,6 +78,7 @@ struct nf_flowtable {
struct delayed_work gc_work;
unsigned int flags;
struct flow_block flow_block;
+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
possible_net_t net;
};
@@ -55,9 +90,19 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
enum flow_offload_tuple_dir {
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
+};
+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
+
+enum flow_offload_xmit_type {
+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
+ FLOW_OFFLOAD_XMIT_NEIGH,
+ FLOW_OFFLOAD_XMIT_XFRM,
+ FLOW_OFFLOAD_XMIT_DIRECT,
+ FLOW_OFFLOAD_XMIT_TC,
};
+#define NF_FLOW_TABLE_ENCAP_MAX 2
+
struct flow_offload_tuple {
union {
struct in_addr src_v4;
@@ -76,11 +121,34 @@ struct flow_offload_tuple {
u8 l3proto;
u8 l4proto;
- u8 dir;
+ struct {
+ u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
- u16 mtu;
+ /* All members above are keys for lookups, see flow_offload_hash(). */
+ struct { } __hash;
- struct dst_entry *dst_cache;
+ u8 dir:2,
+ xmit_type:3,
+ encap_num:2,
+ in_vlan_ingress:2;
+ u16 mtu;
+ union {
+ struct {
+ struct dst_entry *dst_cache;
+ u32 dst_cookie;
+ };
+ struct {
+ u32 ifidx;
+ u32 hw_ifidx;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
+ struct {
+ u32 iifidx;
+ } tc;
+ };
};
struct flow_offload_tuple_rhash {
@@ -95,7 +163,7 @@ enum nf_flow_flags {
NF_FLOW_HW,
NF_FLOW_HW_DYING,
NF_FLOW_HW_DEAD,
- NF_FLOW_HW_REFRESH,
+ NF_FLOW_HW_PENDING,
};
enum flow_offload_type {
@@ -115,6 +183,8 @@ struct flow_offload {
#define NF_FLOW_TIMEOUT (30 * HZ)
#define nf_flowtable_time_stamp (u32)jiffies
+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
+
static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
{
return (__s32)(timeout - nf_flowtable_time_stamp);
@@ -122,19 +192,87 @@ static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
struct nf_flow_route {
struct {
- struct dst_entry *dst;
+ struct dst_entry *dst;
+ struct {
+ u32 ifindex;
+ struct {
+ u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
+ u8 num_encaps:2,
+ ingress_vlans:2;
+ } in;
+ struct {
+ u32 ifindex;
+ u32 hw_ifindex;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
+ enum flow_offload_xmit_type xmit_type;
} tuple[FLOW_OFFLOAD_DIR_MAX];
};
struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
void flow_offload_free(struct flow_offload *flow);
+static inline int
+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct flow_block *block = &flow_table->flow_block;
+ struct flow_block_cb *block_cb;
+ int err = 0;
+
+ down_write(&flow_table->flow_block_lock);
+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
+ if (block_cb) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
+ if (IS_ERR(block_cb)) {
+ err = PTR_ERR(block_cb);
+ goto unlock;
+ }
+
+ list_add_tail(&block_cb->list, &block->cb_list);
+
+unlock:
+ up_write(&flow_table->flow_block_lock);
+ return err;
+}
+
+static inline void
+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct flow_block *block = &flow_table->flow_block;
+ struct flow_block_cb *block_cb;
+
+ down_write(&flow_table->flow_block_lock);
+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
+ if (block_cb) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ } else {
+ WARN_ON(true);
+ }
+ up_write(&flow_table->flow_block_lock);
+}
+
int flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
+void flow_offload_refresh(struct nf_flowtable *flow_table,
+ struct flow_offload *flow);
+
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
+void nf_flow_table_gc_run(struct nf_flowtable *flow_table);
+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
+ struct net_device *dev);
void nf_flow_table_cleanup(struct net_device *dev);
int nf_flow_table_init(struct nf_flowtable *flow_table);
@@ -142,12 +280,12 @@ void nf_flow_table_free(struct nf_flowtable *flow_table);
void flow_offload_teardown(struct flow_offload *flow);
-int nf_flow_snat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
-int nf_flow_dnat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
struct flow_ports {
__be16 source, dest;
@@ -169,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
struct flow_offload *flow);
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
+
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd);
@@ -182,4 +322,41 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+{
+ __be16 proto;
+
+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+ sizeof(struct pppoe_hdr)));
+ switch (proto) {
+ case htons(PPP_IP):
+ return htons(ETH_P_IP);
+ case htons(PPP_IPV6):
+ return htons(ETH_P_IPV6);
+ }
+
+ return 0;
+}
+
+#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
+#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
+#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
+ this_cpu_inc((net)->ft.stat->count)
+#define NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count) \
+ this_cpu_dec((net)->ft.stat->count)
+
+#ifdef CONFIG_NF_FLOW_TABLE_PROCFS
+int nf_flow_table_init_proc(struct net *net);
+void nf_flow_table_fini_proc(struct net *net);
+#else
+static inline int nf_flow_table_init_proc(struct net *net)
+{
+ return 0;
+}
+
+static inline void nf_flow_table_fini_proc(struct net *net)
+{
+}
+#endif /* CONFIG_NF_FLOW_TABLE_PROCFS */
+
#endif /* _NF_FLOW_TABLE_H */
diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h
new file mode 100644
index 000000000000..52e27920f829
--- /dev/null
+++ b/include/net/netfilter/nf_hooks_lwtunnel.h
@@ -0,0 +1,7 @@
+#include <linux/sysctl.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_SYSCTL
+int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+#endif
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 0d3920896d50..e55eedc84ed7 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -68,7 +68,6 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf);
int nf_logger_find_get(int pf, enum nf_log_type type);
void nf_logger_put(int pf, enum nf_log_type type);
-void nf_logger_request_module(int pf, enum nf_log_type type);
#define MODULE_ALIAS_NF_LOGGER(family, type) \
MODULE_ALIAS("nf-logger-" __stringify(family) "-" __stringify(type))
@@ -99,27 +98,4 @@ struct nf_log_buf;
struct nf_log_buf *nf_log_buf_open(void);
__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...);
void nf_log_buf_close(struct nf_log_buf *m);
-
-/* common logging functions */
-int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset);
-int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset,
- unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
- struct sock *sk);
-void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
- unsigned int hooknum, const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix);
-void nf_log_l2packet(struct net *net, u_int8_t pf,
- __be16 protocol,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo, const char *prefix);
-
#endif /* _NF_LOG_H */
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 0d412dd63707..e9eb01e99d2f 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -104,9 +104,7 @@ unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
-int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family);
-
-static inline int nf_nat_initialized(struct nf_conn *ct,
+static inline int nf_nat_initialized(const struct nf_conn *ct,
enum nf_nat_manip_type manip)
{
if (manip == NF_NAT_MANIP_SRC)
diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h
index efae84646353..44c421b9be85 100644
--- a/include/net/netfilter/nf_nat_helper.h
+++ b/include/net/netfilter/nf_nat_helper.h
@@ -38,4 +38,5 @@ bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct,
* to port ct->master->saved_proto. */
void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *this);
+u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port);
#endif
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 47088083667b..c81021ab07aa 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -14,7 +14,10 @@ struct nf_queue_entry {
struct sk_buff *skb;
unsigned int id;
unsigned int hook_index; /* index in hook_entries->hook[] */
-
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ struct net_device *physin;
+ struct net_device *physout;
+#endif
struct nf_hook_state state;
u16 size; /* sizeof(entry) + saved route keys */
@@ -30,17 +33,17 @@ struct nf_queue_handler {
void (*nf_hook_drop)(struct net *net);
};
-void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
-void nf_unregister_queue_handler(struct net *net);
+void nf_register_queue_handler(const struct nf_queue_handler *qh);
+void nf_unregister_queue_handler(void);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
-void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+void nf_queue_entry_free(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
{
while (*jhash_initval == 0)
- *jhash_initval = prandom_u32();
+ *jhash_initval = get_random_u32();
}
static inline u32 hash_v4(const struct iphdr *iph, u32 initval)
diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h
index 9051c3a0c8e7..7c669792fb9c 100644
--- a/include/net/netfilter/nf_reject.h
+++ b/include/net/netfilter/nf_reject.h
@@ -5,12 +5,28 @@
#include <linux/types.h>
#include <uapi/linux/in.h>
-static inline bool nf_reject_verify_csum(__u8 proto)
+static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff,
+ __u8 proto)
{
/* Skip protocols that don't use 16-bit one's complement checksum
* of the entire payload.
*/
switch (proto) {
+ /* Protocols with optional checksums. */
+ case IPPROTO_UDP: {
+ const struct udphdr *udp_hdr;
+ struct udphdr _udp_hdr;
+
+ udp_hdr = skb_header_pointer(skb, dataoff,
+ sizeof(_udp_hdr),
+ &_udp_hdr);
+ if (!udp_hdr || udp_hdr->check)
+ return true;
+
+ return false;
+ }
+ case IPPROTO_GRE:
+
/* Protocols with other integrity checks. */
case IPPROTO_AH:
case IPPROTO_ESP:
@@ -19,9 +35,6 @@ static inline bool nf_reject_verify_csum(__u8 proto)
/* Protocols with partial checksums. */
case IPPROTO_UDPLITE:
case IPPROTO_DCCP:
-
- /* Protocols with optional checksums. */
- case IPPROTO_GRE:
return false;
}
return true;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4170c033d461..cdb7db9b0e25 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -13,42 +13,62 @@
#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
#include <net/flow_offload.h>
+#include <net/netns/generic.h>
+
+#define NFT_MAX_HOOKS (NF_INET_INGRESS + 1)
struct module;
#define NFT_JUMP_STACK_SIZE 16
+enum {
+ NFT_PKTINFO_L4PROTO = (1 << 0),
+ NFT_PKTINFO_INNER = (1 << 1),
+};
+
struct nft_pktinfo {
struct sk_buff *skb;
- bool tprot_set;
+ const struct nf_hook_state *state;
+ u8 flags;
u8 tprot;
- /* for x_tables compatibility */
- struct xt_action_param xt;
+ u16 fragoff;
+ unsigned int thoff;
+ unsigned int inneroff;
};
+static inline struct sock *nft_sk(const struct nft_pktinfo *pkt)
+{
+ return pkt->state->sk;
+}
+
+static inline unsigned int nft_thoff(const struct nft_pktinfo *pkt)
+{
+ return pkt->thoff;
+}
+
static inline struct net *nft_net(const struct nft_pktinfo *pkt)
{
- return pkt->xt.state->net;
+ return pkt->state->net;
}
static inline unsigned int nft_hook(const struct nft_pktinfo *pkt)
{
- return pkt->xt.state->hook;
+ return pkt->state->hook;
}
static inline u8 nft_pf(const struct nft_pktinfo *pkt)
{
- return pkt->xt.state->pf;
+ return pkt->state->pf;
}
static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt)
{
- return pkt->xt.state->in;
+ return pkt->state->in;
}
static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt)
{
- return pkt->xt.state->out;
+ return pkt->state->out;
}
static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
@@ -56,16 +76,15 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
const struct nf_hook_state *state)
{
pkt->skb = skb;
- pkt->xt.state = state;
+ pkt->state = state;
}
-static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt)
{
- pkt->tprot_set = false;
+ pkt->flags = 0;
pkt->tprot = 0;
- pkt->xt.thoff = 0;
- pkt->xt.fragoff = 0;
+ pkt->thoff = 0;
+ pkt->fragoff = 0;
}
/**
@@ -86,6 +105,8 @@ struct nft_data {
};
} __attribute__((aligned(__alignof__(u64))));
+#define NFT_REG32_NUM 20
+
/**
* struct nft_regs - nf_tables register set
*
@@ -96,11 +117,22 @@ struct nft_data {
*/
struct nft_regs {
union {
- u32 data[20];
+ u32 data[NFT_REG32_NUM];
struct nft_verdict verdict;
};
};
+struct nft_regs_track {
+ struct {
+ const struct nft_expr *selector;
+ const struct nft_expr *bitwise;
+ u8 num_reg;
+ } regs[NFT_REG32_NUM];
+
+ const struct nft_expr *cur;
+ const struct nft_expr *last;
+};
+
/* Store/load an u8, u16 or u64 integer to/from the u32 data register.
*
* Note, when using concatenations, register allocation happens at 32-bit
@@ -125,11 +157,26 @@ static inline void nft_reg_store16(u32 *dreg, u16 val)
*(u16 *)dreg = val;
}
+static inline void nft_reg_store_be16(u32 *dreg, __be16 val)
+{
+ nft_reg_store16(dreg, (__force __u16)val);
+}
+
static inline u16 nft_reg_load16(const u32 *sreg)
{
return *(u16 *)sreg;
}
+static inline __be16 nft_reg_load_be16(const u32 *sreg)
+{
+ return (__force __be16)nft_reg_load16(sreg);
+}
+
+static inline __be32 nft_reg_load_be32(const u32 *sreg)
+{
+ return *(__force __be32 *)sreg;
+}
+
static inline void nft_reg_store64(u32 *dreg, u64 val)
{
put_unaligned(val, (u64 *)dreg);
@@ -143,16 +190,11 @@ static inline u64 nft_reg_load64(const u32 *sreg)
static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
unsigned int len)
{
+ if (len % NFT_REG32_SIZE)
+ dst[len / NFT_REG32_SIZE] = 0;
memcpy(dst, src, len);
}
-static inline void nft_data_debug(const struct nft_data *data)
-{
- pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n",
- data->data[0], data->data[1],
- data->data[2], data->data[3]);
-}
-
/**
* struct nft_ctx - nf_tables rule/set context
*
@@ -179,13 +221,18 @@ struct nft_ctx {
bool report;
};
+enum nft_data_desc_flags {
+ NFT_DATA_DESC_SETELEM = (1 << 0),
+};
+
struct nft_data_desc {
enum nft_data_types type;
+ unsigned int size;
unsigned int len;
+ unsigned int flags;
};
-int nft_data_init(const struct nft_ctx *ctx,
- struct nft_data *data, unsigned int size,
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_data_desc *desc, const struct nlattr *nla);
void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
void nft_data_release(const struct nft_data *data, enum nft_data_types type);
@@ -203,14 +250,13 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
}
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
-unsigned int nft_parse_register(const struct nlattr *attr);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
-int nft_validate_register_load(enum nft_registers reg, unsigned int len);
-int nft_validate_register_store(const struct nft_ctx *ctx,
- enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type, unsigned int len);
+int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len);
+int nft_parse_register_store(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *dreg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len);
/**
* struct nft_userdata - user defined data associated with an object
@@ -224,7 +270,7 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
*/
struct nft_userdata {
u8 len;
- unsigned char data[0];
+ unsigned char data[];
};
/**
@@ -243,6 +289,10 @@ struct nft_set_elem {
u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
struct nft_data val;
} key_end;
+ union {
+ u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
+ struct nft_data val;
+ } data;
void *priv;
};
@@ -266,6 +316,7 @@ struct nft_set_iter {
* @size: number of set elements
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
+ * @expr: set must support for expressions
*/
struct nft_set_desc {
unsigned int klen;
@@ -273,6 +324,7 @@ struct nft_set_desc {
unsigned int size;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
+ bool expr;
};
/**
@@ -302,8 +354,35 @@ struct nft_set_estimate {
enum nft_set_class space;
};
+#define NFT_EXPR_MAXATTR 16
+#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \
+ ALIGN(size, __alignof__(struct nft_expr)))
+
+/**
+ * struct nft_expr - nf_tables expression
+ *
+ * @ops: expression ops
+ * @data: expression private data
+ */
+struct nft_expr {
+ const struct nft_expr_ops *ops;
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(u64))));
+};
+
+static inline void *nft_expr_priv(const struct nft_expr *expr)
+{
+ return (void *)expr->data;
+}
+
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
+void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
+int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
+ const struct nft_expr *expr);
+bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
+ const struct nft_expr *expr);
+
struct nft_set_ext;
-struct nft_expr;
/**
* struct nft_set_ops - nf_tables set operations
@@ -385,20 +464,29 @@ struct nft_set_ops {
* struct nft_set_type - nf_tables set type
*
* @ops: set ops for this type
- * @list: used internally
- * @owner: module reference
* @features: features supported by the implementation
*/
struct nft_set_type {
const struct nft_set_ops ops;
- struct list_head list;
- struct module *owner;
u32 features;
};
#define to_set_type(o) container_of(o, struct nft_set_type, ops)
-int nft_register_set(struct nft_set_type *type);
-void nft_unregister_set(struct nft_set_type *type);
+struct nft_set_elem_expr {
+ u8 size;
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(struct nft_expr))));
+};
+
+#define nft_setelem_expr_at(__elem_expr, __offset) \
+ ((struct nft_expr *)&__elem_expr->data[__offset])
+
+#define nft_setelem_expr_foreach(__expr, __elem_expr, __size) \
+ for (__expr = nft_setelem_expr_at(__elem_expr, 0), __size = 0; \
+ __size < (__elem_expr)->size; \
+ __size += (__expr)->ops->size, __expr = ((void *)(__expr)) + (__expr)->ops->size)
+
+#define NFT_SET_EXPR_MAX 2
/**
* struct nft_set - nf_tables set instance
@@ -423,6 +511,7 @@ void nft_unregister_set(struct nft_set_type *type);
* @policy: set parameterization (see enum nft_set_policies)
* @udlen: user data length
* @udata: user data
+ * @expr: stateful expression
* @ops: set ops
* @flags: set flags
* @genmask: generation mask
@@ -457,6 +546,9 @@ struct nft_set {
genmask:2;
u8 klen;
u8 dlen;
+ u8 num_exprs;
+ struct nft_expr *exprs[NFT_SET_EXPR_MAX];
+ struct list_head catchall_list;
unsigned char data[]
__attribute__((aligned(__alignof__(u64))));
};
@@ -482,6 +574,10 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
const struct nlattr *nla_set_id,
u8 genmask);
+struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
+ const struct nft_set *set);
+void *nft_set_catchall_gc(const struct nft_set *set);
+
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
@@ -521,7 +617,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
* @NFT_SET_EXT_TIMEOUT: element timeout
* @NFT_SET_EXT_EXPIRATION: element expiration time
* @NFT_SET_EXT_USERDATA: user data associated with the element
- * @NFT_SET_EXT_EXPR: expression assiociated with the element
+ * @NFT_SET_EXT_EXPRESSIONS: expressions assiciated with the element
* @NFT_SET_EXT_OBJREF: stateful object reference associated with element
* @NFT_SET_EXT_NUM: number of extension types
*/
@@ -533,7 +629,7 @@ enum nft_set_extensions {
NFT_SET_EXT_TIMEOUT,
NFT_SET_EXT_EXPIRATION,
NFT_SET_EXT_USERDATA,
- NFT_SET_EXT_EXPR,
+ NFT_SET_EXT_EXPRESSIONS,
NFT_SET_EXT_OBJREF,
NFT_SET_EXT_NUM
};
@@ -560,6 +656,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[];
struct nft_set_ext_tmpl {
u16 len;
u8 offset[NFT_SET_EXT_NUM];
+ u8 ext_len[NFT_SET_EXT_NUM];
};
/**
@@ -572,7 +669,7 @@ struct nft_set_ext_tmpl {
struct nft_set_ext {
u8 genmask;
u8 offset[NFT_SET_EXT_NUM];
- char data[0];
+ char data[];
};
static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
@@ -581,18 +678,23 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
tmpl->len = sizeof(struct nft_set_ext);
}
-static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
- unsigned int len)
+static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
+ unsigned int len)
{
tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align);
- BUG_ON(tmpl->len > U8_MAX);
+ if (tmpl->len > U8_MAX)
+ return -EINVAL;
+
tmpl->offset[id] = tmpl->len;
- tmpl->len += nft_set_ext_types[id].len + len;
+ tmpl->ext_len[id] = nft_set_ext_types[id].len + len;
+ tmpl->len += tmpl->ext_len[id];
+
+ return 0;
}
-static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
+static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
{
- nft_set_ext_add_length(tmpl, id, 0);
+ return nft_set_ext_add_length(tmpl, id, 0);
}
static inline void nft_set_ext_init(struct nft_set_ext *ext,
@@ -651,9 +753,9 @@ static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext
return nft_set_ext(ext, NFT_SET_EXT_USERDATA);
}
-static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext)
+static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ext *ext)
{
- return nft_set_ext(ext, NFT_SET_EXT_EXPR);
+ return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS);
}
static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
@@ -673,10 +775,16 @@ static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext)
return nft_set_ext(ext, NFT_SET_EXT_OBJREF);
}
+struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nlattr *attr);
+
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *key_end, const u32 *data,
u64 timeout, u64 expiration, gfp_t gfp);
+int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_expr *expr_array[]);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);
@@ -792,7 +900,6 @@ struct nft_offload_ctx;
* @validate: validate expression, called during loop detection
* @data: extra data to attach to this expression operation
*/
-struct nft_expr;
struct nft_expr_ops {
void (*eval)(const struct nft_expr *expr,
struct nft_regs *regs,
@@ -818,43 +925,20 @@ struct nft_expr_ops {
int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data);
+ bool (*reduce)(struct nft_regs_track *track,
+ const struct nft_expr *expr);
bool (*gc)(struct net *net,
const struct nft_expr *expr);
int (*offload)(struct nft_offload_ctx *ctx,
struct nft_flow_rule *flow,
const struct nft_expr *expr);
- u32 offload_flags;
+ bool (*offload_action)(const struct nft_expr *expr);
+ void (*offload_stats)(struct nft_expr *expr,
+ const struct flow_stats *stats);
const struct nft_expr_type *type;
void *data;
};
-#define NFT_EXPR_MAXATTR 16
-#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \
- ALIGN(size, __alignof__(struct nft_expr)))
-
-/**
- * struct nft_expr - nf_tables expression
- *
- * @ops: expression ops
- * @data: expression private data
- */
-struct nft_expr {
- const struct nft_expr_ops *ops;
- unsigned char data[]
- __attribute__((aligned(__alignof__(u64))));
-};
-
-static inline void *nft_expr_priv(const struct nft_expr *expr)
-{
- return (void *)expr->data;
-}
-
-struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
- const struct nlattr *nla);
-void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
-int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
- const struct nft_expr *expr);
-
/**
* struct nft_rule - nf_tables rule
*
@@ -890,11 +974,37 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
return (struct nft_expr *)&rule->data[rule->dlen];
}
+static inline bool nft_expr_more(const struct nft_rule *rule,
+ const struct nft_expr *expr)
+{
+ return expr != nft_expr_last(rule) && expr->ops;
+}
+
static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
{
return (void *)&rule->data[rule->dlen];
}
+void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule);
+
+static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_set_elem_expr *elem_expr;
+ struct nft_expr *expr;
+ u32 size;
+
+ if (__nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) {
+ elem_expr = nft_set_ext_expr(ext);
+ nft_setelem_expr_foreach(expr, elem_expr, size) {
+ expr->ops->eval(expr, regs, pkt);
+ if (regs->verdict.code == NFT_BREAK)
+ return;
+ }
+ }
+}
+
/*
* The last pointer isn't really necessary, but the compiler isn't able to
* determine that the result of nft_expr_last() is always the same since it
@@ -905,12 +1015,21 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
(expr) != (last); \
(expr) = nft_expr_next(expr))
-enum nft_chain_flags {
- NFT_BASE_CHAIN = 0x1,
- NFT_CHAIN_HW_OFFLOAD = 0x2,
+#define NFT_CHAIN_POLICY_UNSET U8_MAX
+
+struct nft_rule_dp {
+ u64 is_last:1,
+ dlen:12,
+ handle:42; /* for tracing */
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(struct nft_expr))));
};
-#define NFT_CHAIN_POLICY_UNSET U8_MAX
+struct nft_rule_blob {
+ unsigned long size;
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(struct nft_rule_dp))));
+};
/**
* struct nft_chain - nf_tables chain
@@ -925,20 +1044,23 @@ enum nft_chain_flags {
* @name: name of the chain
*/
struct nft_chain {
- struct nft_rule *__rcu *rules_gen_0;
- struct nft_rule *__rcu *rules_gen_1;
+ struct nft_rule_blob __rcu *blob_gen_0;
+ struct nft_rule_blob __rcu *blob_gen_1;
struct list_head rules;
struct list_head list;
struct rhlist_head rhlhead;
struct nft_table *table;
u64 handle;
u32 use;
- u8 flags:6,
+ u8 flags:5,
+ bound:1,
genmask:2;
char *name;
+ u16 udlen;
+ u8 *udata;
/* Only used during control plane commit phase: */
- struct nft_rule **rules_next;
+ struct nft_rule_blob *blob_next;
};
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
@@ -968,7 +1090,7 @@ struct nft_chain_type {
int family;
struct module *owner;
unsigned int hook_mask;
- nf_hookfn *hooks[NF_MAX_HOOKS];
+ nf_hookfn *hooks[NFT_MAX_HOOKS];
int (*ops_register)(struct net *net, const struct nf_hook_ops *ops);
void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
};
@@ -978,6 +1100,14 @@ int nft_chain_validate_dependency(const struct nft_chain *chain,
int nft_chain_validate_hooks(const struct nft_chain *chain,
unsigned int hook_flags);
+static inline bool nft_chain_is_bound(struct nft_chain *chain)
+{
+ return (chain->flags & NFT_CHAIN_BINDING) && chain->bound;
+}
+
+void nft_chain_del(struct nft_chain *chain);
+void nf_tables_chain_destroy(struct nft_ctx *ctx);
+
struct nft_stats {
u64 bytes;
u64 pkts;
@@ -1019,7 +1149,7 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai
static inline bool nft_is_base_chain(const struct nft_chain *chain)
{
- return chain->flags & NFT_BASE_CHAIN;
+ return chain->flags & NFT_CHAIN_BASE;
}
int __nft_release_basechain(struct nft_ctx *ctx);
@@ -1056,9 +1186,23 @@ struct nft_table {
u16 family:6,
flags:8,
genmask:2;
+ u32 nlpid;
char *name;
+ u16 udlen;
+ u8 *udata;
};
+static inline bool nft_table_has_owner(const struct nft_table *table)
+{
+ return table->flags & NFT_TABLE_F_OWNER;
+}
+
+static inline bool nft_base_chain_netdev(int family, u32 hooknum)
+{
+ return family == NFPROTO_NETDEV ||
+ (family == NFPROTO_INET && hooknum == NF_INET_INGRESS);
+}
+
void nft_register_chain_type(const struct nft_chain_type *);
void nft_unregister_chain_type(const struct nft_chain_type *);
@@ -1098,6 +1242,8 @@ struct nft_object {
u32 genmask:2,
use:30;
u64 handle;
+ u16 udlen;
+ u8 *udata;
/* runtime data below here */
const struct nft_object_ops *ops ____cacheline_aligned;
unsigned char data[]
@@ -1118,7 +1264,7 @@ struct nft_object *nft_obj_lookup(const struct net *net,
void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq,
- int event, int family, int report, gfp_t gfp);
+ int event, u16 flags, int family, int report, gfp_t gfp);
/**
* struct nft_object_type - stateful object type
@@ -1218,24 +1364,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
/**
* struct nft_traceinfo - nft tracing information and state
*
+ * @trace: other struct members are initialised
+ * @nf_trace: copy of skb->nf_trace before rule evaluation
+ * @type: event type (enum nft_trace_types)
+ * @skbid: hash of skb to be used as trace id
+ * @packet_dumped: packet headers sent in a previous traceinfo message
* @pkt: pktinfo currently processed
* @basechain: base chain currently processed
* @chain: chain currently processed
* @rule: rule that was evaluated
* @verdict: verdict given by rule
- * @type: event type (enum nft_trace_types)
- * @packet_dumped: packet headers sent in a previous traceinfo message
- * @trace: other struct members are initialised
*/
struct nft_traceinfo {
+ bool trace;
+ bool nf_trace;
+ bool packet_dumped;
+ enum nft_trace_types type:8;
+ u32 skbid;
const struct nft_pktinfo *pkt;
const struct nft_base_chain *basechain;
const struct nft_chain *chain;
- const struct nft_rule *rule;
+ const struct nft_rule_dp *rule;
const struct nft_verdict *verdict;
- enum nft_trace_types type;
- bool packet_dumped;
- bool trace;
};
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
@@ -1253,9 +1403,6 @@ void nft_trace_notify(struct nft_traceinfo *info);
#define MODULE_ALIAS_NFT_EXPR(name) \
MODULE_ALIAS("nft-expr-" name)
-#define MODULE_ALIAS_NFT_SET() \
- MODULE_ALIAS("nft-set")
-
#define MODULE_ALIAS_NFT_OBJ(type) \
MODULE_ALIAS("nft-obj-" __stringify(type))
@@ -1385,7 +1532,7 @@ struct nft_trans {
int msg_type;
bool put_net;
struct nft_ctx ctx;
- char data[0];
+ char data[];
};
struct nft_trans_rule {
@@ -1419,6 +1566,7 @@ struct nft_trans_chain {
char *name;
struct nft_stats __percpu *stats;
u8 policy;
+ u32 chain_id;
};
#define nft_trans_chain_update(trans) \
@@ -1429,16 +1577,15 @@ struct nft_trans_chain {
(((struct nft_trans_chain *)trans->data)->stats)
#define nft_trans_chain_policy(trans) \
(((struct nft_trans_chain *)trans->data)->policy)
+#define nft_trans_chain_id(trans) \
+ (((struct nft_trans_chain *)trans->data)->chain_id)
struct nft_trans_table {
bool update;
- bool enable;
};
#define nft_trans_table_update(trans) \
(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans) \
- (((struct nft_trans_table *)trans->data)->enable)
struct nft_trans_elem {
struct nft_set *set;
@@ -1468,14 +1615,74 @@ struct nft_trans_obj {
struct nft_trans_flowtable {
struct nft_flowtable *flowtable;
+ bool update;
+ struct list_head hook_list;
+ u32 flags;
};
#define nft_trans_flowtable(trans) \
(((struct nft_trans_flowtable *)trans->data)->flowtable)
+#define nft_trans_flowtable_update(trans) \
+ (((struct nft_trans_flowtable *)trans->data)->update)
+#define nft_trans_flowtable_hooks(trans) \
+ (((struct nft_trans_flowtable *)trans->data)->hook_list)
+#define nft_trans_flowtable_flags(trans) \
+ (((struct nft_trans_flowtable *)trans->data)->flags)
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
void __init nft_chain_route_init(void);
void nft_chain_route_fini(void);
+
+void nf_tables_trans_destroy_flush_work(void);
+
+int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
+__be64 nf_jiffies64_to_msecs(u64 input);
+
+#ifdef CONFIG_MODULES
+__printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...);
+#else
+static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; }
+#endif
+
+struct nftables_pernet {
+ struct list_head tables;
+ struct list_head commit_list;
+ struct list_head module_list;
+ struct list_head notify_list;
+ struct mutex commit_mutex;
+ u64 table_handle;
+ unsigned int base_seq;
+ u8 validate_state;
+};
+
+extern unsigned int nf_tables_net_id;
+
+static inline struct nftables_pernet *nft_pernet(const struct net *net)
+{
+ return net_generic(net, nf_tables_net_id);
+}
+
+#define __NFT_REDUCE_READONLY 1UL
+#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
+
+static inline bool nft_reduce_is_readonly(const struct nft_expr *expr)
+{
+ return expr->ops->reduce == NFT_REDUCE_READONLY;
+}
+
+void nft_reg_track_update(struct nft_regs_track *track,
+ const struct nft_expr *expr, u8 dreg, u8 len);
+void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len);
+void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg);
+
+static inline bool nft_reg_track_cmp(struct nft_regs_track *track,
+ const struct nft_expr *expr, u8 dreg)
+{
+ return track->regs[dreg].selector &&
+ track->regs[dreg].selector->ops == expr->ops &&
+ track->regs[dreg].num_reg == 0;
+}
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 29e7e1021267..1223af68cd9a 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -3,9 +3,11 @@
#define _NET_NF_TABLES_CORE_H
#include <net/netfilter/nf_tables.h>
+#include <linux/indirect_call_wrapper.h>
extern struct nft_expr_type nft_imm_type;
extern struct nft_expr_type nft_cmp_type;
+extern struct nft_expr_type nft_counter_type;
extern struct nft_expr_type nft_lookup_type;
extern struct nft_expr_type nft_bitwise_type;
extern struct nft_expr_type nft_byteorder_type;
@@ -15,50 +17,60 @@ extern struct nft_expr_type nft_range_type;
extern struct nft_expr_type nft_meta_type;
extern struct nft_expr_type nft_rt_type;
extern struct nft_expr_type nft_exthdr_type;
+extern struct nft_expr_type nft_last_type;
#ifdef CONFIG_NETWORK_SECMARK
extern struct nft_object_type nft_secmark_obj_type;
#endif
+extern struct nft_object_type nft_counter_obj_type;
int nf_tables_core_module_init(void);
void nf_tables_core_module_exit(void);
+struct nft_bitwise_fast_expr {
+ u32 mask;
+ u32 xor;
+ u8 sreg;
+ u8 dreg;
+};
+
struct nft_cmp_fast_expr {
u32 data;
- enum nft_registers sreg:8;
+ u32 mask;
+ u8 sreg;
+ u8 len;
+ bool inv;
+};
+
+struct nft_cmp16_fast_expr {
+ struct nft_data data;
+ struct nft_data mask;
+ u8 sreg;
u8 len;
+ bool inv;
};
struct nft_immediate_expr {
struct nft_data data;
- enum nft_registers dreg:8;
+ u8 dreg;
u8 dlen;
};
-/* Calculate the mask for the nft_cmp_fast expression. On big endian the
- * mask needs to include the *upper* bytes when interpreting that data as
- * something smaller than the full u32, therefore a cpu_to_le32 is done.
- */
-static inline u32 nft_cmp_fast_mask(unsigned int len)
-{
- return cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr,
- data) * BITS_PER_BYTE - len));
-}
-
extern const struct nft_expr_ops nft_cmp_fast_ops;
+extern const struct nft_expr_ops nft_cmp16_fast_ops;
struct nft_payload {
enum nft_payload_bases base:8;
u8 offset;
u8 len;
- enum nft_registers dreg:8;
+ u8 dreg;
};
struct nft_payload_set {
enum nft_payload_bases base:8;
u8 offset;
u8 len;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 csum_type;
u8 csum_offset;
u8 csum_flags;
@@ -66,15 +78,50 @@ struct nft_payload_set {
extern const struct nft_expr_ops nft_payload_fast_ops;
+extern const struct nft_expr_ops nft_bitwise_fast_ops;
+
extern struct static_key_false nft_counters_enabled;
extern struct static_key_false nft_trace_enabled;
-extern struct nft_set_type nft_set_rhash_type;
-extern struct nft_set_type nft_set_hash_type;
-extern struct nft_set_type nft_set_hash_fast_type;
-extern struct nft_set_type nft_set_rbtree_type;
-extern struct nft_set_type nft_set_bitmap_type;
-extern struct nft_set_type nft_set_pipapo_type;
+extern const struct nft_set_type nft_set_rhash_type;
+extern const struct nft_set_type nft_set_hash_type;
+extern const struct nft_set_type nft_set_hash_fast_type;
+extern const struct nft_set_type nft_set_rbtree_type;
+extern const struct nft_set_type nft_set_bitmap_type;
+extern const struct nft_set_type nft_set_pipapo_type;
+extern const struct nft_set_type nft_set_pipapo_avx2_type;
+
+#ifdef CONFIG_RETPOLINE
+bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+bool nft_hash_lookup_fast(const struct net *net,
+ const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+#else
+static inline bool
+nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ return set->ops->lookup(net, set, key, ext);
+}
+#endif
+
+/* called from nft_pipapo_avx2.c */
+bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+/* called from nft_set_pipapo.c */
+bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext);
+
+void nft_counter_init_seqcount(void);
struct nft_expr;
struct nft_regs;
@@ -99,4 +146,6 @@ void nft_dynset_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt);
void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt);
#endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index ed7b511f0a59..c4a6147b0ef8 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -5,26 +5,24 @@
#include <net/netfilter/nf_tables.h>
#include <net/ip.h>
-static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt)
{
struct iphdr *ip;
ip = ip_hdr(pkt->skb);
- pkt->tprot_set = true;
+ pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = ip->protocol;
- pkt->xt.thoff = ip_hdrlen(pkt->skb);
- pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+ pkt->thoff = ip_hdrlen(pkt->skb);
+ pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET;
}
-static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
{
struct iphdr *iph, _iph;
u32 len, thoff;
- iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
- &_iph);
+ iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
+ sizeof(*iph), &_iph);
if (!iph)
return -1;
@@ -33,24 +31,56 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
len = ntohs(iph->tot_len);
thoff = iph->ihl * 4;
- if (skb->len < len)
+ if (pkt->skb->len < len)
return -1;
else if (len < thoff)
return -1;
- pkt->tprot_set = true;
+ pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = iph->protocol;
- pkt->xt.thoff = thoff;
- pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+ pkt->thoff = thoff;
+ pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
return 0;
}
-static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
{
- if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0)
- nft_set_pktinfo_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv4_validate(pkt) < 0)
+ nft_set_pktinfo_unspec(pkt);
+}
+
+static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
+{
+ struct iphdr *iph;
+ u32 len, thoff;
+
+ if (!pskb_may_pull(pkt->skb, sizeof(*iph)))
+ return -1;
+
+ iph = ip_hdr(pkt->skb);
+ if (iph->ihl < 5 || iph->version != 4)
+ goto inhdr_error;
+
+ len = ntohs(iph->tot_len);
+ thoff = iph->ihl * 4;
+ if (pkt->skb->len < len) {
+ __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS);
+ return -1;
+ } else if (len < thoff) {
+ goto inhdr_error;
+ }
+
+ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = iph->protocol;
+ pkt->thoff = thoff;
+ pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+
+ return 0;
+
+inhdr_error:
+ __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS);
+ return -1;
}
#endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index d0f1c537b017..ec7eaeaf4f04 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -6,8 +6,7 @@
#include <net/ipv6.h>
#include <net/netfilter/nf_tables.h>
-static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt)
{
unsigned int flags = IP6_FH_F_AUTH;
int protohdr, thoff = 0;
@@ -15,18 +14,17 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0) {
- nft_set_pktinfo_unspec(pkt, skb);
+ nft_set_pktinfo_unspec(pkt);
return;
}
- pkt->tprot_set = true;
+ pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = protohdr;
- pkt->xt.thoff = thoff;
- pkt->xt.fragoff = frag_off;
+ pkt->thoff = thoff;
+ pkt->fragoff = frag_off;
}
-static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
{
#if IS_ENABLED(CONFIG_IPV6)
unsigned int flags = IP6_FH_F_AUTH;
@@ -36,8 +34,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
int protohdr;
u32 pkt_len;
- ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
- &_ip6h);
+ ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
+ sizeof(*ip6h), &_ip6h);
if (!ip6h)
return -1;
@@ -45,17 +43,17 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
return -1;
pkt_len = ntohs(ip6h->payload_len);
- if (pkt_len + sizeof(*ip6h) > skb->len)
+ if (pkt_len + sizeof(*ip6h) > pkt->skb->len)
return -1;
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0)
return -1;
- pkt->tprot_set = true;
+ pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = protohdr;
- pkt->xt.thoff = thoff;
- pkt->xt.fragoff = frag_off;
+ pkt->thoff = thoff;
+ pkt->fragoff = frag_off;
return 0;
#else
@@ -63,11 +61,55 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
#endif
}
-static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
{
- if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0)
- nft_set_pktinfo_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv6_validate(pkt) < 0)
+ nft_set_pktinfo_unspec(pkt);
+}
+
+static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ unsigned int flags = IP6_FH_F_AUTH;
+ unsigned short frag_off;
+ unsigned int thoff = 0;
+ struct inet6_dev *idev;
+ struct ipv6hdr *ip6h;
+ int protohdr;
+ u32 pkt_len;
+
+ if (!pskb_may_pull(pkt->skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(pkt->skb);
+ if (ip6h->version != 6)
+ goto inhdr_error;
+
+ pkt_len = ntohs(ip6h->payload_len);
+ if (pkt_len + sizeof(*ip6h) > pkt->skb->len) {
+ idev = __in6_dev_get(nft_in(pkt));
+ __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ return -1;
+ }
+
+ protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
+ if (protohdr < 0)
+ goto inhdr_error;
+
+ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = protohdr;
+ pkt->thoff = thoff;
+ pkt->fragoff = frag_off;
+
+ return 0;
+
+inhdr_error:
+ idev = __in6_dev_get(nft_in(pkt));
+ __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INHDRERRORS);
+ return -1;
+#else
+ return -1;
+#endif
}
#endif
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index ea7d1d78b92d..3568b6a2f5f0 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -4,11 +4,16 @@
#include <net/flow_offload.h>
#include <net/netfilter/nf_tables.h>
+enum nft_offload_reg_flags {
+ NFT_OFFLOAD_F_NETWORK2HOST = (1 << 0),
+};
+
struct nft_offload_reg {
u32 key;
u32 len;
u32 base_offset;
u32 offset;
+ u32 flags;
struct nft_data data;
struct nft_data mask;
};
@@ -37,6 +42,7 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
struct nft_flow_key {
struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_control control;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
@@ -44,6 +50,7 @@ struct nft_flow_key {
struct flow_dissector_key_ports tp;
struct flow_dissector_key_ip ip;
struct flow_dissector_key_vlan vlan;
+ struct flow_dissector_key_vlan cvlan;
struct flow_dissector_key_eth_addrs eth_addrs;
struct flow_dissector_key_meta meta;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
@@ -60,23 +67,32 @@ struct nft_flow_rule {
struct flow_rule *rule;
};
-#define NFT_OFFLOAD_F_ACTION (1 << 0)
+void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
+ enum flow_dissector_key_id addr_type);
struct nft_rule;
struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule);
+int nft_flow_rule_stats(const struct nft_chain *chain, const struct nft_rule *rule);
void nft_flow_rule_destroy(struct nft_flow_rule *flow);
int nft_flow_rule_offload_commit(struct net *net);
-#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
+#define NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, __flags) \
(__reg)->base_offset = \
offsetof(struct nft_flow_key, __base); \
(__reg)->offset = \
offsetof(struct nft_flow_key, __base.__field); \
(__reg)->len = __len; \
(__reg)->key = __key; \
+ (__reg)->flags = __flags;
+
+#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
+ NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, 0)
+
+#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg) \
+ NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
memset(&(__reg)->mask, 0xff, (__reg)->len);
-int nft_chain_offload_priority(struct nft_base_chain *basechain);
+bool nft_chain_offload_support(const struct nft_base_chain *basechain);
int nft_offload_init(void);
void nft_offload_exit(void);
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 628b6fa579cd..eed099eae672 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -5,7 +5,7 @@
#include <net/netfilter/nf_tables.h>
struct nft_fib {
- enum nft_registers dreg:8;
+ u8 dreg;
u8 result;
u32 flags;
};
@@ -37,4 +37,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct net_device *dev);
+
+bool nft_fib_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr);
#endif
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index 07e2fd507963..9b51cc67de54 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -6,9 +6,10 @@
struct nft_meta {
enum nft_meta_keys key:8;
+ u8 len;
union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
};
};
@@ -43,4 +44,6 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data);
+bool nft_meta_get_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr);
#endif
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index 832ab69efda5..4c3809e141f4 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -6,7 +6,7 @@
struct xt_rateest {
/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
spinlock_t lock;
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 56c365dc6dc7..6bfa972f2fbf 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -142,7 +142,7 @@
* Attribute Misc:
* nla_memcpy(dest, nla, count) copy attribute into memory
* nla_memcmp(nla, data, size) compare attribute with memory area
- * nla_strlcpy(dst, nla, size) copy attribute to a sized string
+ * nla_strscpy(dst, nla, size) copy attribute to a sized string
* nla_strcmp(nla, str) compare attribute with string
*
* Attribute Parsing:
@@ -181,19 +181,29 @@ enum {
NLA_S64,
NLA_BITFIELD32,
NLA_REJECT,
- NLA_EXACT_LEN,
- NLA_EXACT_LEN_WARN,
- NLA_MIN_LEN,
+ NLA_BE16,
+ NLA_BE32,
__NLA_TYPE_MAX,
};
#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+struct netlink_range_validation {
+ u64 min, max;
+};
+
+struct netlink_range_validation_signed {
+ s64 min, max;
+};
+
enum nla_policy_validation {
NLA_VALIDATE_NONE,
NLA_VALIDATE_RANGE,
+ NLA_VALIDATE_RANGE_WARN_TOO_LONG,
NLA_VALIDATE_MIN,
NLA_VALIDATE_MAX,
+ NLA_VALIDATE_MASK,
+ NLA_VALIDATE_RANGE_PTR,
NLA_VALIDATE_FUNCTION,
};
@@ -213,79 +223,98 @@ enum nla_policy_validation {
* NLA_NUL_STRING Maximum length of string (excluding NUL)
* NLA_FLAG Unused
* NLA_BINARY Maximum length of attribute payload
- * NLA_MIN_LEN Minimum length of attribute payload
+ * (but see also below with the validation type)
* NLA_NESTED,
* NLA_NESTED_ARRAY Length verification is done by checking len of
* nested header (or empty); len field is used if
- * validation_data is also used, for the max attr
+ * nested_policy is also used, for the max attr
* number in the nested policy.
* NLA_U8, NLA_U16,
* NLA_U32, NLA_U64,
* NLA_S8, NLA_S16,
* NLA_S32, NLA_S64,
+ * NLA_BE16, NLA_BE32,
* NLA_MSECS Leaving the length field zero will verify the
* given type fits, using it verifies minimum length
* just like "All other"
* NLA_BITFIELD32 Unused
* NLA_REJECT Unused
- * NLA_EXACT_LEN Attribute must have exactly this length, otherwise
- * it is rejected.
- * NLA_EXACT_LEN_WARN Attribute should have exactly this length, a warning
- * is logged if it is longer, shorter is rejected.
- * NLA_MIN_LEN Minimum length of attribute payload
* All other Minimum length of attribute payload
*
- * Meaning of `validation_data' field:
+ * Meaning of validation union:
* NLA_BITFIELD32 This is a 32-bit bitmap/bitselector attribute and
- * validation data must point to a u32 value of valid
- * flags
- * NLA_REJECT This attribute is always rejected and validation data
+ * `bitfield32_valid' is the u32 value of valid flags
+ * NLA_REJECT This attribute is always rejected and `reject_message'
* may point to a string to report as the error instead
* of the generic one in extended ACK.
- * NLA_NESTED Points to a nested policy to validate, must also set
- * `len' to the max attribute number.
+ * NLA_NESTED `nested_policy' to a nested policy to validate, must
+ * also set `len' to the max attribute number. Use the
+ * provided NLA_POLICY_NESTED() macro.
* Note that nla_parse() will validate, but of course not
* parse, the nested sub-policies.
- * NLA_NESTED_ARRAY Points to a nested policy to validate, must also set
- * `len' to the max attribute number. The difference to
- * NLA_NESTED is the structure - NLA_NESTED has the
- * nested attributes directly inside, while an array has
- * the nested attributes at another level down and the
- * attributes directly in the nesting don't matter.
- * All other Unused - but note that it's a union
- *
- * Meaning of `min' and `max' fields, use via NLA_POLICY_MIN, NLA_POLICY_MAX
- * and NLA_POLICY_RANGE:
+ * NLA_NESTED_ARRAY `nested_policy' points to a nested policy to validate,
+ * must also set `len' to the max attribute number. Use
+ * the provided NLA_POLICY_NESTED_ARRAY() macro.
+ * The difference to NLA_NESTED is the structure:
+ * NLA_NESTED has the nested attributes directly inside
+ * while an array has the nested attributes at another
+ * level down and the attribute types directly in the
+ * nesting don't matter.
* NLA_U8,
* NLA_U16,
* NLA_U32,
* NLA_U64,
+ * NLA_BE16,
+ * NLA_BE32,
* NLA_S8,
* NLA_S16,
* NLA_S32,
- * NLA_S64 These are used depending on the validation_type
- * field, if that is min/max/range then the minimum,
- * maximum and both are used (respectively) to check
+ * NLA_S64 The `min' and `max' fields are used depending on the
+ * validation_type field, if that is min/max/range then
+ * the min, max or both are used (respectively) to check
* the value of the integer attribute.
* Note that in the interest of code simplicity and
* struct size both limits are s16, so you cannot
* enforce a range that doesn't fall within the range
* of s16 - do that as usual in the code instead.
+ * Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and
+ * NLA_POLICY_RANGE() macros.
+ * NLA_U8,
+ * NLA_U16,
+ * NLA_U32,
+ * NLA_U64 If the validation_type field instead is set to
+ * NLA_VALIDATE_RANGE_PTR, `range' must be a pointer
+ * to a struct netlink_range_validation that indicates
+ * the min/max values.
+ * Use NLA_POLICY_FULL_RANGE().
+ * NLA_S8,
+ * NLA_S16,
+ * NLA_S32,
+ * NLA_S64 If the validation_type field instead is set to
+ * NLA_VALIDATE_RANGE_PTR, `range_signed' must be a
+ * pointer to a struct netlink_range_validation_signed
+ * that indicates the min/max values.
+ * Use NLA_POLICY_FULL_RANGE_SIGNED().
+ *
+ * NLA_BINARY If the validation type is like the ones for integers
+ * above, then the min/max length (not value like for
+ * integers) of the attribute is enforced.
+ *
* All other Unused - but note that it's a union
*
* Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
- * NLA_BINARY Validation function called for the attribute,
- * not compatible with use of the validation_data
- * as in NLA_BITFIELD32, NLA_REJECT, NLA_NESTED and
- * NLA_NESTED_ARRAY.
+ * NLA_BINARY Validation function called for the attribute.
* All other Unused - but note that it's a union
*
* Example:
+ *
+ * static const u32 myvalidflags = 0xff231023;
+ *
* static const struct nla_policy my_policy[ATTR_MAX+1] = {
* [ATTR_FOO] = { .type = NLA_U16 },
* [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
- * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) },
- * [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },
+ * [ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)),
+ * [ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags),
* };
*/
struct nla_policy {
@@ -293,22 +322,20 @@ struct nla_policy {
u8 validation_type;
u16 len;
union {
- const void *validation_data;
- struct {
- s16 min, max;
- };
- int (*validate)(const struct nlattr *attr,
- struct netlink_ext_ack *extack);
- /* This entry is special, and used for the attribute at index 0
+ /**
+ * @strict_start_type: first attribute to validate strictly
+ *
+ * This entry is special, and used for the attribute at index 0
* only, and specifies special data about the policy, namely it
* specifies the "boundary type" where strict length validation
* starts for any attribute types >= this value, also, strict
* nesting validation starts here.
*
* Additionally, it means that NLA_UNSPEC is actually NLA_REJECT
- * for any types >= this, so need to use NLA_MIN_LEN to get the
- * previous pure { .len = xyz } behaviour. The advantage of this
- * is that types not specified in the policy will be rejected.
+ * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to
+ * get the previous pure { .len = xyz } behaviour. The advantage
+ * of this is that types not specified in the policy will be
+ * rejected.
*
* For completely new families it should be set to 1 so that the
* validation is enforced for all attributes. For existing ones
@@ -317,57 +344,103 @@ struct nla_policy {
* was added to enforce strict validation from thereon.
*/
u16 strict_start_type;
+
+ /* private: use NLA_POLICY_*() to set */
+ const u32 bitfield32_valid;
+ const u32 mask;
+ const char *reject_message;
+ const struct nla_policy *nested_policy;
+ struct netlink_range_validation *range;
+ struct netlink_range_validation_signed *range_signed;
+ struct {
+ s16 min, max;
+ };
+ int (*validate)(const struct nlattr *attr,
+ struct netlink_ext_ack *extack);
};
};
-#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len }
-#define NLA_POLICY_EXACT_LEN_WARN(_len) { .type = NLA_EXACT_LEN_WARN, \
- .len = _len }
-#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len }
-
#define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN)
#define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
#define _NLA_POLICY_NESTED(maxattr, policy) \
- { .type = NLA_NESTED, .validation_data = policy, .len = maxattr }
+ { .type = NLA_NESTED, .nested_policy = policy, .len = maxattr }
#define _NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
- { .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr }
+ { .type = NLA_NESTED_ARRAY, .nested_policy = policy, .len = maxattr }
#define NLA_POLICY_NESTED(policy) \
_NLA_POLICY_NESTED(ARRAY_SIZE(policy) - 1, policy)
#define NLA_POLICY_NESTED_ARRAY(policy) \
_NLA_POLICY_NESTED_ARRAY(ARRAY_SIZE(policy) - 1, policy)
+#define NLA_POLICY_BITFIELD32(valid) \
+ { .type = NLA_BITFIELD32, .bitfield32_valid = valid }
+
+#define __NLA_IS_UINT_TYPE(tp) \
+ (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64)
+#define __NLA_IS_SINT_TYPE(tp) \
+ (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
+#define __NLA_IS_BEINT_TYPE(tp) \
+ (tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
-#define NLA_ENSURE_INT_TYPE(tp) \
- (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \
- tp == NLA_S16 || tp == NLA_U16 || \
- tp == NLA_S32 || tp == NLA_U32 || \
- tp == NLA_S64 || tp == NLA_U64) + tp)
+#define NLA_ENSURE_UINT_TYPE(tp) \
+ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp)
+#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp) \
+ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \
+ tp == NLA_MSECS || \
+ tp == NLA_BINARY) + tp)
+#define NLA_ENSURE_SINT_TYPE(tp) \
+ (__NLA_ENSURE(__NLA_IS_SINT_TYPE(tp)) + tp)
+#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \
+ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \
+ __NLA_IS_SINT_TYPE(tp) || \
+ __NLA_IS_BEINT_TYPE(tp) || \
+ tp == NLA_MSECS || \
+ tp == NLA_BINARY) + tp)
#define NLA_ENSURE_NO_VALIDATION_PTR(tp) \
(__NLA_ENSURE(tp != NLA_BITFIELD32 && \
tp != NLA_REJECT && \
tp != NLA_NESTED && \
tp != NLA_NESTED_ARRAY) + tp)
+#define NLA_ENSURE_BEINT_TYPE(tp) \
+ (__NLA_ENSURE(__NLA_IS_BEINT_TYPE(tp)) + tp)
#define NLA_POLICY_RANGE(tp, _min, _max) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_RANGE, \
.min = _min, \
.max = _max \
}
+#define NLA_POLICY_FULL_RANGE(tp, _range) { \
+ .type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp), \
+ .validation_type = NLA_VALIDATE_RANGE_PTR, \
+ .range = _range, \
+}
+
+#define NLA_POLICY_FULL_RANGE_SIGNED(tp, _range) { \
+ .type = NLA_ENSURE_SINT_TYPE(tp), \
+ .validation_type = NLA_VALIDATE_RANGE_PTR, \
+ .range_signed = _range, \
+}
+
#define NLA_POLICY_MIN(tp, _min) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_MIN, \
.min = _min, \
}
#define NLA_POLICY_MAX(tp, _max) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_MAX, \
.max = _max, \
}
+#define NLA_POLICY_MASK(tp, _mask) { \
+ .type = NLA_ENSURE_UINT_TYPE(tp), \
+ .validation_type = NLA_VALIDATE_MASK, \
+ .mask = _mask, \
+}
+
#define NLA_POLICY_VALIDATE_FN(tp, fn, ...) { \
.type = NLA_ENSURE_NO_VALIDATION_PTR(tp), \
.validation_type = NLA_VALIDATE_FUNCTION, \
@@ -375,6 +448,15 @@ struct nla_policy {
.len = __VA_ARGS__ + 0, \
}
+#define NLA_POLICY_EXACT_LEN(_len) NLA_POLICY_RANGE(NLA_BINARY, _len, _len)
+#define NLA_POLICY_EXACT_LEN_WARN(_len) { \
+ .type = NLA_BINARY, \
+ .validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG, \
+ .min = _len, \
+ .max = _len \
+}
+#define NLA_POLICY_MIN_LEN(_len) NLA_POLICY_MIN(NLA_BINARY, _len)
+
/**
* struct nl_info - netlink source information
* @nlh: Netlink message header of original request
@@ -439,7 +521,7 @@ int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
struct netlink_ext_ack *extack);
int nla_policy_len(const struct nla_policy *, int);
struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
-size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize);
+ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize);
char *nla_strdup(const struct nlattr *nla, gfp_t flags);
int nla_memcpy(void *dest, const struct nlattr *src, int count);
int nla_memcmp(const struct nlattr *nla, const void *data, size_t size);
@@ -674,7 +756,7 @@ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
- * @validate: validation strictness
+ * @policy: validation policy
* @extack: extended ACK report struct
*
* See nla_parse()
@@ -694,6 +776,7 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
* @extack: extended ACK report struct
*
* See nla_parse_deprecated()
@@ -713,6 +796,7 @@ static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
* @extack: extended ACK report struct
*
* See nla_parse_deprecated_strict()
@@ -748,7 +832,6 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
* @len: length of attribute stream
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
- * @validate: validation strictness
* @extack: extended ACK report struct
*
* Validates all attributes in the specified attribute stream against the
@@ -772,7 +855,6 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len,
* @len: length of attribute stream
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
- * @validate: validation strictness
* @extack: extended ACK report struct
*
* Validates all attributes in the specified attribute stream against the
@@ -820,7 +902,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
*/
static inline int nlmsg_report(const struct nlmsghdr *nlh)
{
- return !!(nlh->nlmsg_flags & NLM_F_ECHO);
+ return nlh ? !!(nlh->nlmsg_flags & NLM_F_ECHO) : 0;
}
/**
@@ -1466,6 +1548,21 @@ static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype,
}
/**
+ * nla_put_bitfield32 - Add a bitfield32 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: value carrying bits
+ * @selector: selector of valid bits
+ */
+static inline int nla_put_bitfield32(struct sk_buff *skb, int attrtype,
+ __u32 value, __u32 selector)
+{
+ struct nla_bitfield32 tmp = { value, selector, };
+
+ return nla_put(skb, attrtype, sizeof(tmp), &tmp);
+}
+
+/**
* nla_get_u32 - return payload of u32 attribute
* @nla: u32 netlink attribute
*/
@@ -1861,4 +1958,26 @@ static inline bool nla_is_last(const struct nlattr *nla, int rem)
return nla->nla_len == rem;
}
+void nla_get_range_unsigned(const struct nla_policy *pt,
+ struct netlink_range_validation *range);
+void nla_get_range_signed(const struct nla_policy *pt,
+ struct netlink_range_validation_signed *range);
+
+struct netlink_policy_dump_state;
+
+int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
+ const struct nla_policy *policy,
+ unsigned int maxtype);
+int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state,
+ const struct nla_policy *policy,
+ unsigned int maxtype);
+bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state);
+int netlink_policy_dump_write(struct sk_buff *skb,
+ struct netlink_policy_dump_state *state);
+int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt);
+int netlink_policy_dump_write_attr(struct sk_buff *skb,
+ const struct nla_policy *pt,
+ int nestattr);
+void netlink_policy_dump_free(struct netlink_policy_dump_state *state);
+
#endif
diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h
new file mode 100644
index 000000000000..2c01a278d1eb
--- /dev/null
+++ b/include/net/netns/bpf.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BPF programs attached to network namespace
+ */
+
+#ifndef __NETNS_BPF_H__
+#define __NETNS_BPF_H__
+
+#include <linux/list.h>
+
+struct bpf_prog;
+struct bpf_prog_array;
+
+enum netns_bpf_attach_type {
+ NETNS_BPF_INVALID = -1,
+ NETNS_BPF_FLOW_DISSECTOR = 0,
+ NETNS_BPF_SK_LOOKUP,
+ MAX_NETNS_BPF_ATTACH_TYPE
+};
+
+struct netns_bpf {
+ /* Array of programs to run compiled from progs or links */
+ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE];
+ struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE];
+ struct list_head links[MAX_NETNS_BPF_ATTACH_TYPE];
+};
+
+#endif /* __NETNS_BPF_H__ */
diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index b6ab7d1530d7..48b79f7e6236 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -7,6 +7,7 @@
#define __NETNS_CAN_H__
#include <linux/spinlock.h>
+#include <linux/timer.h>
struct can_dev_rcv_lists;
struct can_pkg_stats;
@@ -15,7 +16,6 @@ struct can_rcv_lists_stats;
struct netns_can {
#if IS_ENABLED(CONFIG_PROC_FS)
struct proc_dir_entry *proc_dir;
- struct proc_dir_entry *pde_version;
struct proc_dir_entry *pde_stats;
struct proc_dir_entry *pde_reset_stats;
struct proc_dir_entry *pde_rcvlist_all;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 806454e767bf..e1290c159184 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -24,9 +24,13 @@ struct nf_generic_net {
struct nf_tcp_net {
unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
- int tcp_loose;
- int tcp_be_liberal;
- int tcp_max_retrans;
+ u8 tcp_loose;
+ u8 tcp_be_liberal;
+ u8 tcp_max_retrans;
+ u8 tcp_ignore_invalid_rst;
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ unsigned int offload_timeout;
+#endif
};
enum udp_conntrack {
@@ -37,6 +41,9 @@ enum udp_conntrack {
struct nf_udp_net {
unsigned int timeouts[UDP_CT_MAX];
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ unsigned int offload_timeout;
+#endif
};
struct nf_icmp_net {
@@ -45,7 +52,7 @@ struct nf_icmp_net {
#ifdef CONFIG_NF_CT_PROTO_DCCP
struct nf_dccp_net {
- int dccp_loose;
+ u8 dccp_loose;
unsigned int dccp_timeout[CT_DCCP_MAX + 1];
};
#endif
@@ -86,34 +93,19 @@ struct nf_ip_net {
#endif
};
-struct ct_pcpu {
- spinlock_t lock;
- struct hlist_nulls_head unconfirmed;
- struct hlist_nulls_head dying;
-};
-
struct netns_ct {
- atomic_t count;
- unsigned int expect_count;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- struct delayed_work ecache_dwork;
+ u8 ctnetlink_has_listener;
bool ecache_dwork_pending;
#endif
- bool auto_assign_helper_warned;
-#ifdef CONFIG_SYSCTL
- struct ctl_table_header *sysctl_header;
-#endif
- unsigned int sysctl_log_invalid; /* Log invalid packets */
- int sysctl_events;
- int sysctl_acct;
- int sysctl_auto_assign_helper;
- int sysctl_tstamp;
- int sysctl_checksum;
+ u8 sysctl_log_invalid; /* Log invalid packets */
+ u8 sysctl_events;
+ u8 sysctl_acct;
+ u8 sysctl_tstamp;
+ u8 sysctl_checksum;
- struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
- struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 36c2d998a43c..8249060cf5d0 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -2,6 +2,8 @@
#ifndef __NETNS_CORE_H__
#define __NETNS_CORE_H__
+#include <linux/types.h>
+
struct ctl_table_header;
struct prot_inuse;
@@ -10,9 +12,9 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS
- int __percpu *sock_inuse;
struct prot_inuse __percpu *prot_inuse;
#endif
};
diff --git a/include/net/netns/dccp.h b/include/net/netns/dccp.h
deleted file mode 100644
index cdbc4f5b8390..000000000000
--- a/include/net/netns/dccp.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NETNS_DCCP_H__
-#define __NETNS_DCCP_H__
-
-struct sock;
-
-struct netns_dccp {
- struct sock *v4_ctl_sk;
- struct sock *v6_ctl_sk;
-};
-
-#endif
diff --git a/include/net/netns/flow_table.h b/include/net/netns/flow_table.h
new file mode 100644
index 000000000000..1c5fc657e267
--- /dev/null
+++ b/include/net/netns/flow_table.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETNS_FLOW_TABLE_H
+#define __NETNS_FLOW_TABLE_H
+
+struct nf_flow_table_stat {
+ unsigned int count_wq_add;
+ unsigned int count_wq_del;
+ unsigned int count_wq_stats;
+};
+
+struct netns_ft {
+ struct nf_flow_table_stat __percpu *stat;
+};
+#endif
diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h
index 8a1ab47c3fb3..00c399edeed1 100644
--- a/include/net/netns/generic.h
+++ b/include/net/netns/generic.h
@@ -8,6 +8,7 @@
#include <linux/bug.h>
#include <linux/rcupdate.h>
+#include <net/net_namespace.h>
/*
* Generic net pointers are to be used by modules to put some private
@@ -32,7 +33,7 @@ struct net_generic {
struct rcu_head rcu;
} s;
- void *ptr[0];
+ DECLARE_FLEX_ARRAY(void *, ptr);
};
};
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 08b98414d94e..1b8004679445 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -9,9 +9,9 @@
#include <linux/uidgid.h>
#include <net/inet_frag.h>
#include <linux/rcupdate.h>
+#include <linux/seqlock.h>
#include <linux/siphash.h>
-struct tcpm_hash_bucket;
struct ctl_table_header;
struct ipv4_devconf;
struct fib_rules_ops;
@@ -32,8 +32,9 @@ struct ping_group_range {
struct inet_hashinfo;
struct inet_timewait_death_row {
- atomic_t tw_count;
+ refcount_t tw_refcount;
+ /* Padding to avoid false sharing, tw_refcount can be often written */
struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
int sysctl_max_tw_buckets;
};
@@ -41,6 +42,8 @@ struct inet_timewait_death_row {
struct tcp_fastopen_context;
struct netns_ipv4 {
+ struct inet_timewait_death_row tcp_death_row;
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
struct ctl_table_header *frags_hdr;
@@ -54,68 +57,64 @@ struct netns_ipv4 {
struct mutex ra_mutex;
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
- bool fib_has_custom_rules;
- unsigned int fib_rules_require_fldissect;
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
+ unsigned int fib_rules_require_fldissect;
+ bool fib_has_custom_rules;
#endif
bool fib_has_custom_local_routes;
+ bool fib_offload_disabled;
#ifdef CONFIG_IP_ROUTE_CLASSID
- int fib_num_tclassid_users;
+ atomic_t fib_num_tclassid_users;
#endif
struct hlist_head *fib_table_hash;
- bool fib_offload_disabled;
struct sock *fibnl;
- struct sock * __percpu *icmp_sk;
struct sock *mc_autojoin_sk;
struct inet_peer_base *peers;
- struct sock * __percpu *tcp_sk;
struct fqdir *fqdir;
-#ifdef CONFIG_NETFILTER
- struct xt_table *iptable_filter;
- struct xt_table *iptable_mangle;
- struct xt_table *iptable_raw;
- struct xt_table *arptable_filter;
-#ifdef CONFIG_SECURITY
- struct xt_table *iptable_security;
-#endif
- struct xt_table *nat_table;
-#endif
- int sysctl_icmp_echo_ignore_all;
- int sysctl_icmp_echo_ignore_broadcasts;
- int sysctl_icmp_ignore_bogus_error_responses;
+ u8 sysctl_icmp_echo_ignore_all;
+ u8 sysctl_icmp_echo_enable_probe;
+ u8 sysctl_icmp_echo_ignore_broadcasts;
+ u8 sysctl_icmp_ignore_bogus_error_responses;
+ u8 sysctl_icmp_errors_use_inbound_ifaddr;
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
- int sysctl_icmp_errors_use_inbound_ifaddr;
+
+ u32 ip_rt_min_pmtu;
+ int ip_rt_mtu_expires;
+ int ip_rt_min_advmss;
struct local_ports ip_local_ports;
- int sysctl_tcp_ecn;
- int sysctl_tcp_ecn_fallback;
+ u8 sysctl_tcp_ecn;
+ u8 sysctl_tcp_ecn_fallback;
- int sysctl_ip_default_ttl;
- int sysctl_ip_no_pmtu_disc;
- int sysctl_ip_fwd_use_pmtu;
- int sysctl_ip_fwd_update_priority;
- int sysctl_ip_nonlocal_bind;
+ u8 sysctl_ip_default_ttl;
+ u8 sysctl_ip_no_pmtu_disc;
+ u8 sysctl_ip_fwd_use_pmtu;
+ u8 sysctl_ip_fwd_update_priority;
+ u8 sysctl_ip_nonlocal_bind;
+ u8 sysctl_ip_autobind_reuse;
/* Shall we try to damage output packets if routing dev changes? */
- int sysctl_ip_dynaddr;
- int sysctl_ip_early_demux;
+ u8 sysctl_ip_dynaddr;
+ u8 sysctl_ip_early_demux;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_raw_l3mdev_accept;
+ u8 sysctl_raw_l3mdev_accept;
#endif
- int sysctl_tcp_early_demux;
- int sysctl_udp_early_demux;
+ u8 sysctl_tcp_early_demux;
+ u8 sysctl_udp_early_demux;
- int sysctl_fwmark_reflect;
- int sysctl_tcp_fwmark_accept;
+ u8 sysctl_nexthop_compat_mode;
+
+ u8 sysctl_fwmark_reflect;
+ u8 sysctl_tcp_fwmark_accept;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_tcp_l3mdev_accept;
+ u8 sysctl_tcp_l3mdev_accept;
#endif
- int sysctl_tcp_mtu_probing;
+ u8 sysctl_tcp_mtu_probing;
int sysctl_tcp_mtu_probe_floor;
int sysctl_tcp_base_mss;
int sysctl_tcp_min_snd_mss;
@@ -123,73 +122,80 @@ struct netns_ipv4 {
u32 sysctl_tcp_probe_interval;
int sysctl_tcp_keepalive_time;
- int sysctl_tcp_keepalive_probes;
int sysctl_tcp_keepalive_intvl;
+ u8 sysctl_tcp_keepalive_probes;
- int sysctl_tcp_syn_retries;
- int sysctl_tcp_synack_retries;
- int sysctl_tcp_syncookies;
+ u8 sysctl_tcp_syn_retries;
+ u8 sysctl_tcp_synack_retries;
+ u8 sysctl_tcp_syncookies;
+ u8 sysctl_tcp_migrate_req;
+ u8 sysctl_tcp_comp_sack_nr;
int sysctl_tcp_reordering;
- int sysctl_tcp_retries1;
- int sysctl_tcp_retries2;
- int sysctl_tcp_orphan_retries;
+ u8 sysctl_tcp_retries1;
+ u8 sysctl_tcp_retries2;
+ u8 sysctl_tcp_orphan_retries;
+ u8 sysctl_tcp_tw_reuse;
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
- int sysctl_tcp_tw_reuse;
- int sysctl_tcp_sack;
- int sysctl_tcp_window_scaling;
- int sysctl_tcp_timestamps;
- int sysctl_tcp_early_retrans;
- int sysctl_tcp_recovery;
- int sysctl_tcp_thin_linear_timeouts;
- int sysctl_tcp_slow_start_after_idle;
- int sysctl_tcp_retrans_collapse;
- int sysctl_tcp_stdurg;
- int sysctl_tcp_rfc1337;
- int sysctl_tcp_abort_on_overflow;
- int sysctl_tcp_fack;
+ u8 sysctl_tcp_sack;
+ u8 sysctl_tcp_window_scaling;
+ u8 sysctl_tcp_timestamps;
+ u8 sysctl_tcp_early_retrans;
+ u8 sysctl_tcp_recovery;
+ u8 sysctl_tcp_thin_linear_timeouts;
+ u8 sysctl_tcp_slow_start_after_idle;
+ u8 sysctl_tcp_retrans_collapse;
+ u8 sysctl_tcp_stdurg;
+ u8 sysctl_tcp_rfc1337;
+ u8 sysctl_tcp_abort_on_overflow;
+ u8 sysctl_tcp_fack; /* obsolete */
int sysctl_tcp_max_reordering;
- int sysctl_tcp_dsack;
- int sysctl_tcp_app_win;
int sysctl_tcp_adv_win_scale;
- int sysctl_tcp_frto;
- int sysctl_tcp_nometrics_save;
- int sysctl_tcp_no_ssthresh_metrics_save;
- int sysctl_tcp_moderate_rcvbuf;
- int sysctl_tcp_tso_win_divisor;
- int sysctl_tcp_workaround_signed_windows;
+ u8 sysctl_tcp_dsack;
+ u8 sysctl_tcp_app_win;
+ u8 sysctl_tcp_frto;
+ u8 sysctl_tcp_nometrics_save;
+ u8 sysctl_tcp_no_ssthresh_metrics_save;
+ u8 sysctl_tcp_moderate_rcvbuf;
+ u8 sysctl_tcp_tso_win_divisor;
+ u8 sysctl_tcp_workaround_signed_windows;
int sysctl_tcp_limit_output_bytes;
int sysctl_tcp_challenge_ack_limit;
- int sysctl_tcp_min_tso_segs;
int sysctl_tcp_min_rtt_wlen;
- int sysctl_tcp_autocorking;
+ u8 sysctl_tcp_min_tso_segs;
+ u8 sysctl_tcp_tso_rtt_log;
+ u8 sysctl_tcp_autocorking;
+ u8 sysctl_tcp_reflect_tos;
int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
int sysctl_tcp_wmem[3];
int sysctl_tcp_rmem[3];
- int sysctl_tcp_comp_sack_nr;
+ unsigned int sysctl_tcp_child_ehash_entries;
unsigned long sysctl_tcp_comp_sack_delay_ns;
- struct inet_timewait_death_row tcp_death_row;
+ unsigned long sysctl_tcp_comp_sack_slack_ns;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
const struct tcp_congestion_ops __rcu *tcp_congestion_control;
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
- spinlock_t tcp_fastopen_ctx_lock;
unsigned int sysctl_tcp_fastopen_blackhole_timeout;
atomic_t tfo_active_disable_times;
unsigned long tfo_active_disable_stamp;
+ u32 tcp_challenge_timestamp;
+ u32 tcp_challenge_count;
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
+ u8 sysctl_fib_notify_on_flag_change;
+
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_udp_l3mdev_accept;
+ u8 sysctl_udp_l3mdev_accept;
#endif
+ u8 sysctl_igmp_llm_reports;
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
- int sysctl_igmp_llm_reports;
int sysctl_igmp_qrv;
struct ping_group_range ping_group_range;
@@ -210,8 +216,9 @@ struct netns_ipv4 {
#endif
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int sysctl_fib_multipath_use_neigh;
- int sysctl_fib_multipath_hash_policy;
+ u32 sysctl_fib_multipath_hash_fields;
+ u8 sysctl_fib_multipath_use_neigh;
+ u8 sysctl_fib_multipath_hash_policy;
#endif
struct fib_notifier_ops *notifier_ops;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5ec054473d81..b4af4837d80b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -20,7 +20,6 @@ struct netns_sysctl_ipv6 {
struct ctl_table_header *frags_hdr;
struct ctl_table_header *xfrm6_hdr;
#endif
- int bindv6only;
int flush_delay;
int ip6_rt_max_size;
int ip6_rt_gc_min_interval;
@@ -29,45 +28,44 @@ struct netns_sysctl_ipv6 {
int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires;
int ip6_rt_min_advmss;
- int multipath_hash_policy;
- int flowlabel_consistency;
- int auto_flowlabels;
+ u32 multipath_hash_fields;
+ u8 multipath_hash_policy;
+ u8 bindv6only;
+ u8 flowlabel_consistency;
+ u8 auto_flowlabels;
int icmpv6_time;
- int icmpv6_echo_ignore_all;
- int icmpv6_echo_ignore_multicast;
- int icmpv6_echo_ignore_anycast;
+ u8 icmpv6_echo_ignore_all;
+ u8 icmpv6_echo_ignore_multicast;
+ u8 icmpv6_echo_ignore_anycast;
DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1);
unsigned long *icmpv6_ratemask_ptr;
- int anycast_src_echo_reply;
- int ip_nonlocal_bind;
- int fwmark_reflect;
+ u8 anycast_src_echo_reply;
+ u8 ip_nonlocal_bind;
+ u8 fwmark_reflect;
+ u8 flowlabel_state_ranges;
int idgen_retries;
int idgen_delay;
- int flowlabel_state_ranges;
int flowlabel_reflect;
int max_dst_opts_cnt;
int max_hbh_opts_cnt;
int max_dst_opts_len;
int max_hbh_opts_len;
int seg6_flowlabel;
+ u32 ioam6_id;
+ u64 ioam6_id_wide;
bool skip_notify_on_dev_down;
+ u8 fib_notify_on_flag_change;
};
struct netns_ipv6 {
+ /* Keep ip6_dst_ops at the beginning of netns_sysctl_ipv6 */
+ struct dst_ops ip6_dst_ops;
+
struct netns_sysctl_ipv6 sysctl;
struct ipv6_devconf *devconf_all;
struct ipv6_devconf *devconf_dflt;
struct inet_peer_base *peers;
struct fqdir *fqdir;
-#ifdef CONFIG_NETFILTER
- struct xt_table *ip6table_filter;
- struct xt_table *ip6table_mangle;
- struct xt_table *ip6table_raw;
-#ifdef CONFIG_SECURITY
- struct xt_table *ip6table_security;
-#endif
- struct xt_table *ip6table_nat;
-#endif
struct fib6_info *fib6_null_entry;
struct rt6_info *ip6_null_entry;
struct rt6_statistics *rt6_stats;
@@ -75,14 +73,14 @@ struct netns_ipv6 {
struct hlist_head *fib_table_hash;
struct fib6_table *fib6_main_tbl;
struct list_head fib6_walkers;
- struct dst_ops ip6_dst_ops;
rwlock_t fib6_walker_lock;
spinlock_t fib6_gc_lock;
- unsigned int ip6_rt_gc_expire;
- unsigned long ip6_rt_last_gc;
+ atomic_t ip6_rt_gc_expire;
+ unsigned long ip6_rt_last_gc;
+ unsigned char flowlabel_has_excl;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- unsigned int fib6_rules_require_fldissect;
bool fib6_has_custom_rules;
+ unsigned int fib6_rules_require_fldissect;
#ifdef CONFIG_IPV6_SUBTREES
unsigned int fib6_routes_require_src;
#endif
@@ -91,11 +89,15 @@ struct netns_ipv6 {
struct fib6_table *fib6_local_tbl;
struct fib_rules_ops *fib6_rules_ops;
#endif
- struct sock * __percpu *icmp_sk;
struct sock *ndisc_sk;
struct sock *tcp_sk;
struct sock *igmp_sk;
struct sock *mc_autojoin_sk;
+
+ struct hlist_head *inet6_addr_lst;
+ spinlock_t addrconf_hash_lock;
+ struct delayed_work addr_chk_work;
+
#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
struct mr_table *mrt6;
@@ -115,6 +117,7 @@ struct netns_ipv6 {
spinlock_t lock;
u32 seq;
} ip6addrlbl_table;
+ struct ioam6_pernet_data *ioam6_data;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
new file mode 100644
index 000000000000..1db8f9aaddb4
--- /dev/null
+++ b/include/net/netns/mctp.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MCTP per-net structures
+ */
+
+#ifndef __NETNS_MCTP_H__
+#define __NETNS_MCTP_H__
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+struct netns_mctp {
+ /* Only updated under RTNL, entries freed via RCU */
+ struct list_head routes;
+
+ /* Bound sockets: list of sockets bound by type.
+ * This list is updated from non-atomic contexts (under bind_lock),
+ * and read (under rcu) in packet rx
+ */
+ struct mutex bind_lock;
+ struct hlist_head binds;
+
+ /* tag allocations. This list is read and updated from atomic contexts,
+ * but elements are free()ed after a RCU grace-period
+ */
+ spinlock_t keys_lock;
+ struct hlist_head keys;
+
+ /* MCTP network */
+ unsigned int default_net;
+
+ /* neighbour table */
+ struct mutex neigh_lock;
+ struct list_head neighbours;
+};
+
+#endif /* __NETNS_MCTP_H__ */
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index b5fdb108d602..7e373664b1e7 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -5,28 +5,41 @@
#include <net/snmp.h>
struct netns_mib {
- DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+#endif
+
+ DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
- DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
- DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
- DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
- DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
+ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
#if IS_ENABLED(CONFIG_IPV6)
- struct proc_dir_entry *proc_net_devsnmp6;
DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
- DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
- DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
- DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
- DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib, icmpv6msg_statistics);
#endif
+
#ifdef CONFIG_XFRM_STATISTICS
DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics);
#endif
#if IS_ENABLED(CONFIG_TLS)
DEFINE_SNMP_STAT(struct linux_tls_mib, tls_statistics);
#endif
+#ifdef CONFIG_MPTCP
+ DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics);
+#endif
+
+ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
+#endif
+
+ DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+ DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+ DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib, icmpv6msg_statistics);
+ struct proc_dir_entry *proc_net_devsnmp6;
+#endif
};
#endif
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index a7bdcfbb0b28..19ad2574b267 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -6,6 +6,8 @@
#ifndef __NETNS_MPLS_H__
#define __NETNS_MPLS_H__
+#include <linux/types.h>
+
struct mpls_route;
struct ctl_table_header;
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index ca043342c0eb..02bbdc577f8e 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -12,7 +12,6 @@ struct netns_nf {
#if defined CONFIG_PROC_FS
struct proc_dir_entry *proc_netfilter;
#endif
- const struct nf_queue_handler __rcu *queue_handler;
const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
@@ -25,14 +24,11 @@ struct netns_nf {
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
#endif
-#if IS_ENABLED(CONFIG_DECNET)
- struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
-#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
- bool defrag_ipv4;
+ unsigned int defrag_ipv4_users;
#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- bool defrag_ipv6;
+ unsigned int defrag_ipv6_users;
#endif
};
#endif
diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h
index c712ee5eebd9..434239b37014 100644
--- a/include/net/netns/nexthop.h
+++ b/include/net/netns/nexthop.h
@@ -6,6 +6,7 @@
#ifndef __NETNS_NEXTHOP_H__
#define __NETNS_NEXTHOP_H__
+#include <linux/notifier.h>
#include <linux/rbtree.h>
struct netns_nexthop {
@@ -14,5 +15,6 @@ struct netns_nexthop {
unsigned int seq; /* protected by rtnl_mutex */
u32 last_id_allocated;
+ struct blocking_notifier_head notifier_chain;
};
#endif
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index a1a8d45adb42..8c77832d0240 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -5,13 +5,7 @@
#include <linux/list.h>
struct netns_nftables {
- struct list_head tables;
- struct list_head commit_list;
- struct list_head module_list;
- struct mutex commit_mutex;
- unsigned int base_seq;
u8 gencursor;
- u8 validate_state;
};
#endif
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index d8d02e4188d1..a681147aecd8 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -2,6 +2,9 @@
#ifndef __NETNS_SCTP_H__
#define __NETNS_SCTP_H__
+#include <linux/timer.h>
+#include <net/snmp.h>
+
struct sock;
struct proc_dir_entry;
struct sctp_mib;
@@ -22,6 +25,14 @@ struct netns_sctp {
*/
struct sock *ctl_sock;
+ /* UDP tunneling listening sock. */
+ struct sock *udp4_sock;
+ struct sock *udp6_sock;
+ /* UDP tunneling listening port. */
+ int udp_port;
+ /* UDP tunneling remote encap port. */
+ int encap_port;
+
/* This is the global local address list.
* We actively maintain this complete list of addresses on
* the system by catching address add/delete events.
@@ -76,6 +87,9 @@ struct netns_sctp {
/* HB.interval - 30 seconds */
unsigned int hb_interval;
+ /* The interval for PLPMTUD probe timer */
+ unsigned int probe_interval;
+
/* Association.Max.Retrans - 10 attempts
* Path.Max.Retrans - 5 attempts (per destination address)
* Max.Init.Retransmits - 8 attempts
diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
new file mode 100644
index 000000000000..582212ada3ba
--- /dev/null
+++ b/include/net/netns/smc.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETNS_SMC_H__
+#define __NETNS_SMC_H__
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+
+struct smc_stats_rsn;
+struct smc_stats;
+struct netns_smc {
+ /* per cpu counters for SMC */
+ struct smc_stats __percpu *smc_stats;
+ /* protect fback_rsn */
+ struct mutex mutex_fback_rsn;
+ struct smc_stats_rsn *fback_rsn;
+
+ bool limit_smc_hs; /* constraint on handshake */
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *smc_hdr;
+#endif
+ unsigned int sysctl_autocorking_size;
+ unsigned int sysctl_smcr_buf_type;
+ int sysctl_smcr_testlink_time;
+ int sysctl_wmem;
+ int sysctl_rmem;
+};
+#endif
diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
index 91a3d7e39198..9859d134d5a8 100644
--- a/include/net/netns/unix.h
+++ b/include/net/netns/unix.h
@@ -5,8 +5,16 @@
#ifndef __NETNS_UNIX_H__
#define __NETNS_UNIX_H__
+#include <linux/spinlock.h>
+
+struct unix_table {
+ spinlock_t *locks;
+ struct hlist_head *buckets;
+};
+
struct ctl_table_header;
struct netns_unix {
+ struct unix_table table;
int sysctl_max_dgram_qlen;
struct ctl_table_header *ctl;
};
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
deleted file mode 100644
index 9bc5a12fdbb0..000000000000
--- a/include/net/netns/x_tables.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NETNS_X_TABLES_H
-#define __NETNS_X_TABLES_H
-
-#include <linux/list.h>
-#include <linux/netfilter_defs.h>
-
-struct ebt_table;
-
-struct netns_xt {
- struct list_head tables[NFPROTO_NUMPROTO];
- bool notrack_deprecated_warning;
- bool clusterip_deprecated_warning;
-#if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
- defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
- struct ebt_table *broute_table;
- struct ebt_table *frame_filter;
- struct ebt_table *frame_nat;
-#endif
-};
-#endif
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 59f45b1e9dac..bd7c3be4af5d 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -42,6 +42,7 @@ struct netns_xfrm {
struct hlist_head __rcu *state_bydst;
struct hlist_head __rcu *state_bysrc;
struct hlist_head __rcu *state_byspi;
+ struct hlist_head __rcu *state_byseq;
unsigned int state_hmask;
unsigned int state_num;
struct work_struct state_hash_work;
@@ -64,6 +65,9 @@ struct netns_xfrm {
u32 sysctl_aevent_rseqth;
int sysctl_larval_drop;
u32 sysctl_acq_expires;
+
+ u8 policy_default[XFRM_POLICY_MAX];
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_hdr;
#endif
@@ -72,7 +76,10 @@ struct netns_xfrm {
#if IS_ENABLED(CONFIG_IPV6)
struct dst_ops xfrm6_dst_ops;
#endif
- spinlock_t xfrm_state_lock;
+ spinlock_t xfrm_state_lock;
+ seqcount_spinlock_t xfrm_state_hash_generation;
+ seqcount_spinlock_t xfrm_policy_hash_generation;
+
spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
};
diff --git a/include/net/netrom.h b/include/net/netrom.h
index 80f15b1c1a48..f0565a5987d1 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -14,6 +14,7 @@
#include <net/sock.h>
#include <linux/refcount.h>
#include <linux/seq_file.h>
+#include <net/ax25.h>
#define NR_NETWORK_LEN 15
#define NR_TRANSPORT_LEN 5
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 331ebbc94fe7..28085b995ddc 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -10,6 +10,7 @@
#define __LINUX_NEXTHOP_H
#include <linux/netdevice.h>
+#include <linux/notifier.h>
#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
@@ -26,6 +27,7 @@ struct nh_config {
u8 nh_family;
u8 nh_protocol;
u8 nh_blackhole;
+ u8 nh_fdb;
u32 nh_flags;
int nh_ifindex;
@@ -38,6 +40,12 @@ struct nh_config {
struct nlattr *nh_grp;
u16 nh_grp_type;
+ u16 nh_grp_res_num_buckets;
+ unsigned long nh_grp_res_idle_timer;
+ unsigned long nh_grp_res_unbalanced_timer;
+ bool nh_grp_res_has_num_buckets;
+ bool nh_grp_res_has_idle_timer;
+ bool nh_grp_res_has_unbalanced_timer;
struct nlattr *nh_encap;
u16 nh_encap_type;
@@ -52,6 +60,7 @@ struct nh_info {
u8 family;
bool reject_nh;
+ bool fdb_nh;
union {
struct fib_nh_common fib_nhc;
@@ -60,26 +69,71 @@ struct nh_info {
};
};
+struct nh_res_bucket {
+ struct nh_grp_entry __rcu *nh_entry;
+ atomic_long_t used_time;
+ unsigned long migrated_time;
+ bool occupied;
+ u8 nh_flags;
+};
+
+struct nh_res_table {
+ struct net *net;
+ u32 nhg_id;
+ struct delayed_work upkeep_dw;
+
+ /* List of NHGEs that have too few buckets ("uw" for underweight).
+ * Reclaimed buckets will be given to entries in this list.
+ */
+ struct list_head uw_nh_entries;
+ unsigned long unbalanced_since;
+
+ u32 idle_timer;
+ u32 unbalanced_timer;
+
+ u16 num_nh_buckets;
+ struct nh_res_bucket nh_buckets[];
+};
+
struct nh_grp_entry {
struct nexthop *nh;
u8 weight;
- atomic_t upper_bound;
+
+ union {
+ struct {
+ atomic_t upper_bound;
+ } hthr;
+ struct {
+ /* Member on uw_nh_entries. */
+ struct list_head uw_nh_entry;
+
+ u16 count_buckets;
+ u16 wants_buckets;
+ } res;
+ };
struct list_head nh_list;
struct nexthop *nh_parent; /* nexthop of group with this entry */
};
struct nh_group {
+ struct nh_group *spare; /* spare group for removals */
u16 num_nh;
- bool mpath;
+ bool is_multipath;
+ bool hash_threshold;
+ bool resilient;
+ bool fdb_nh;
bool has_v4;
- struct nh_grp_entry nh_entries[0];
+
+ struct nh_res_table __rcu *res_table;
+ struct nh_grp_entry nh_entries[];
};
struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */
struct list_head fi_list; /* v4 entries using nh */
struct list_head f6i_list; /* v6 entries using nh */
+ struct list_head fdb_list; /* fdb entries using this nh */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net;
@@ -98,6 +152,79 @@ struct nexthop {
};
};
+enum nexthop_event_type {
+ NEXTHOP_EVENT_DEL,
+ NEXTHOP_EVENT_REPLACE,
+ NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+ NEXTHOP_EVENT_BUCKET_REPLACE,
+};
+
+enum nh_notifier_info_type {
+ NH_NOTIFIER_INFO_TYPE_SINGLE,
+ NH_NOTIFIER_INFO_TYPE_GRP,
+ NH_NOTIFIER_INFO_TYPE_RES_TABLE,
+ NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
+};
+
+struct nh_notifier_single_info {
+ struct net_device *dev;
+ u8 gw_family;
+ union {
+ __be32 ipv4;
+ struct in6_addr ipv6;
+ };
+ u8 is_reject:1,
+ is_fdb:1,
+ has_encap:1;
+};
+
+struct nh_notifier_grp_entry_info {
+ u8 weight;
+ u32 id;
+ struct nh_notifier_single_info nh;
+};
+
+struct nh_notifier_grp_info {
+ u16 num_nh;
+ bool is_fdb;
+ struct nh_notifier_grp_entry_info nh_entries[];
+};
+
+struct nh_notifier_res_bucket_info {
+ u16 bucket_index;
+ unsigned int idle_timer_ms;
+ bool force;
+ struct nh_notifier_single_info old_nh;
+ struct nh_notifier_single_info new_nh;
+};
+
+struct nh_notifier_res_table_info {
+ u16 num_nh_buckets;
+ struct nh_notifier_single_info nhs[];
+};
+
+struct nh_notifier_info {
+ struct net *net;
+ struct netlink_ext_ack *extack;
+ u32 id;
+ enum nh_notifier_info_type type;
+ union {
+ struct nh_notifier_single_info *nh;
+ struct nh_notifier_grp_info *nh_grp;
+ struct nh_notifier_res_table_info *nh_res_table;
+ struct nh_notifier_res_bucket_info *nh_res_bucket;
+ };
+};
+
+int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack);
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
+void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
+void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+ bool offload, bool trap);
+void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+ unsigned long *activity);
+
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
void nexthop_free_rcu(struct rcu_head *head);
@@ -119,13 +246,39 @@ static inline bool nexthop_cmp(const struct nexthop *nh1,
return nh1 == nh2;
}
+static inline bool nexthop_is_fdb(const struct nexthop *nh)
+{
+ if (nh->is_group) {
+ const struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ return nh_grp->fdb_nh;
+ } else {
+ const struct nh_info *nhi;
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ return nhi->fdb_nh;
+ }
+}
+
+static inline bool nexthop_has_v4(const struct nexthop *nh)
+{
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ return nh_grp->has_v4;
+ }
+ return false;
+}
+
static inline bool nexthop_is_multipath(const struct nexthop *nh)
{
if (nh->is_group) {
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- return nh_grp->mpath;
+ return nh_grp->is_multipath;
}
return false;
}
@@ -136,21 +289,20 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh)
{
unsigned int rc = 1;
- if (nexthop_is_multipath(nh)) {
+ if (nh->is_group) {
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- rc = nh_grp->num_nh;
+ if (nh_grp->is_multipath)
+ rc = nh_grp->num_nh;
}
return rc;
}
static inline
-struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
+struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
{
- const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
-
/* for_nexthops macros in fib_semantics.c grabs a pointer to
* the nexthop before checking nhsel
*/
@@ -173,7 +325,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
struct fib_nh_common *nhc = &nhi->fib_nhc;
int weight = nhg->nh_entries[i].weight;
- if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0)
+ if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
return -EMSGSIZE;
}
@@ -185,12 +337,14 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh)
{
const struct nh_info *nhi;
- if (nexthop_is_multipath(nh)) {
- if (nexthop_num_path(nh) > 1)
- return false;
- nh = nexthop_mpath_select(nh, 0);
- if (!nh)
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ if (nh_grp->num_nh > 1)
return false;
+
+ nh = nh_grp->nh_entries[0].nh;
}
nhi = rcu_dereference_rtnl(nh->nh_info);
@@ -216,16 +370,79 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
- if (nexthop_is_multipath(nh)) {
- nh = nexthop_mpath_select(nh, nhsel);
- if (!nh)
- return NULL;
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ if (nh_grp->is_multipath) {
+ nh = nexthop_mpath_select(nh_grp, nhsel);
+ if (!nh)
+ return NULL;
+ }
}
nhi = rcu_dereference_rtnl(nh->nh_info);
return &nhi->fib_nhc;
}
+/* called from fib_table_lookup with rcu_lock */
+static inline
+struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
+ int fib_flags,
+ const struct flowi4 *flp,
+ int *nhsel)
+{
+ struct nh_info *nhi;
+
+ if (nh->is_group) {
+ struct nh_group *nhg = rcu_dereference(nh->nh_grp);
+ int i;
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nexthop *nhe = nhg->nh_entries[i].nh;
+
+ nhi = rcu_dereference(nhe->nh_info);
+ if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
+ *nhsel = i;
+ return &nhi->fib_nhc;
+ }
+ }
+ } else {
+ nhi = rcu_dereference(nh->nh_info);
+ if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
+ *nhsel = 0;
+ return &nhi->fib_nhc;
+ }
+ }
+
+ return NULL;
+}
+
+static inline bool nexthop_uses_dev(const struct nexthop *nh,
+ const struct net_device *dev)
+{
+ struct nh_info *nhi;
+
+ if (nh->is_group) {
+ struct nh_group *nhg = rcu_dereference(nh->nh_grp);
+ int i;
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nexthop *nhe = nhg->nh_entries[i].nh;
+
+ nhi = rcu_dereference(nhe->nh_info);
+ if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
+ return true;
+ }
+ } else {
+ nhi = rcu_dereference(nh->nh_info);
+ if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
+ return true;
+ }
+
+ return false;
+}
+
static inline unsigned int fib_info_num_path(const struct fib_info *fi)
{
if (unlikely(fi->nh))
@@ -259,12 +476,16 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack);
+/* Caller should either hold rcu_read_lock(), or RTNL. */
static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
{
struct nh_info *nhi;
- if (nexthop_is_multipath(nh)) {
- nh = nexthop_mpath_select(nh, 0);
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ nh = nexthop_mpath_select(nh_grp, 0);
if (!nh)
return NULL;
}
@@ -276,6 +497,29 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
return NULL;
}
+/* Variant of nexthop_fib6_nh().
+ * Caller should either hold rcu_read_lock_bh(), or RTNL.
+ */
+static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
+{
+ struct nh_info *nhi;
+
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp);
+ nh = nexthop_mpath_select(nh_grp, 0);
+ if (!nh)
+ return NULL;
+ }
+
+ nhi = rcu_dereference_bh_rtnl(nh->nh_info);
+ if (nhi->family == AF_INET6)
+ return &nhi->fib6_nh;
+
+ return NULL;
+}
+
static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
struct fib6_nh *fib6_nh;
@@ -304,4 +548,32 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
int nexthop_for_each_fib6_nh(struct nexthop *nh,
int (*cb)(struct fib6_nh *nh, void *arg),
void *arg);
+
+static inline int nexthop_get_family(struct nexthop *nh)
+{
+ struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
+
+ return nhi->family;
+}
+
+static inline
+struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
+{
+ struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
+
+ return &nhi->fib_nhc;
+}
+
+static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
+ int hash)
+{
+ struct nh_info *nhi;
+ struct nexthop *nhp;
+
+ nhp = nexthop_select_path(nh, hash);
+ if (unlikely(!nhp))
+ return NULL;
+ nhi = rcu_dereference(nhp->nh_info);
+ return &nhi->fib_nhc;
+}
#endif
diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 963db96bcbbb..bb3e8fdc0692 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -191,7 +191,7 @@ struct digital_poll_tech {
struct nfc_digital_dev {
struct nfc_dev *nfc_dev;
- struct nfc_digital_ops *ops;
+ const struct nfc_digital_ops *ops;
u32 protocols;
@@ -236,7 +236,7 @@ struct nfc_digital_dev {
void (*skb_add_crc)(struct sk_buff *skb);
};
-struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
+struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops,
__u32 supported_protocols,
__u32 driver_capabilities,
int tx_headroom,
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index b35f37a57686..756c11084f65 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -118,7 +118,7 @@ struct nfc_hci_dev {
struct sk_buff_head msg_rx_queue;
- struct nfc_hci_ops *ops;
+ const struct nfc_hci_ops *ops;
struct nfc_llc *llc;
@@ -151,7 +151,7 @@ struct nfc_hci_dev {
};
/* hci device allocation */
-struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
+struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops,
struct nfc_hci_init_data *init_data,
unsigned long quirks,
u32 protocols,
@@ -168,7 +168,7 @@ void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata);
void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev);
static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev,
- struct nfc_vendor_cmd *cmds,
+ const struct nfc_vendor_cmd *cmds,
int n_cmds)
{
return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds);
diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 6ab5a83f597c..e82f55f543bb 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -25,6 +25,8 @@
#define NCI_MAX_PARAM_LEN 251
#define NCI_MAX_PAYLOAD_SIZE 255
#define NCI_MAX_PACKET_SIZE 258
+#define NCI_MAX_LARGE_PARAMS_NCI_v2 15
+#define NCI_VER_2_MASK 0x20
/* NCI Status Codes */
#define NCI_STATUS_OK 0x00
@@ -131,6 +133,9 @@
#define NCI_LF_CON_BITR_F_212 0x02
#define NCI_LF_CON_BITR_F_424 0x04
+/* NCI 2.x Feature Enable Bit */
+#define NCI_FEATURE_DISABLE 0x00
+
/* NCI Reset types */
#define NCI_RESET_TYPE_KEEP_CONFIG 0x00
#define NCI_RESET_TYPE_RESET_CONFIG 0x01
@@ -220,6 +225,11 @@ struct nci_core_reset_cmd {
} __packed;
#define NCI_OP_CORE_INIT_CMD nci_opcode_pack(NCI_GID_CORE, 0x01)
+/* To support NCI 2.x */
+struct nci_core_init_v2_cmd {
+ u8 feature1;
+ u8 feature2;
+};
#define NCI_OP_CORE_SET_CONFIG_CMD nci_opcode_pack(NCI_GID_CORE, 0x02)
struct set_config_param {
@@ -244,13 +254,13 @@ struct dest_spec_params {
struct core_conn_create_dest_spec_params {
__u8 type;
__u8 length;
- __u8 value[0];
+ __u8 value[];
} __packed;
struct nci_core_conn_create_cmd {
__u8 destination_type;
__u8 number_destination_params;
- struct core_conn_create_dest_spec_params params[0];
+ struct core_conn_create_dest_spec_params params[];
} __packed;
#define NCI_OP_CORE_CONN_CLOSE_CMD nci_opcode_pack(NCI_GID_CORE, 0x05)
@@ -321,7 +331,7 @@ struct nci_core_init_rsp_1 {
__u8 status;
__le32 nfcc_features;
__u8 num_supported_rf_interfaces;
- __u8 supported_rf_interfaces[0]; /* variable size array */
+ __u8 supported_rf_interfaces[]; /* variable size array */
/* continuted in nci_core_init_rsp_2 */
} __packed;
@@ -334,11 +344,25 @@ struct nci_core_init_rsp_2 {
__le32 manufact_specific_info;
} __packed;
+/* To support NCI ver 2.x */
+struct nci_core_init_rsp_nci_ver2 {
+ u8 status;
+ __le32 nfcc_features;
+ u8 max_logical_connections;
+ __le16 max_routing_table_size;
+ u8 max_ctrl_pkt_payload_len;
+ u8 max_data_pkt_hci_payload_len;
+ u8 number_of_hci_credit;
+ __le16 max_nfc_v_frame_size;
+ u8 num_supported_rf_interfaces;
+ u8 supported_rf_interfaces[];
+} __packed;
+
#define NCI_OP_CORE_SET_CONFIG_RSP nci_opcode_pack(NCI_GID_CORE, 0x02)
struct nci_core_set_config_rsp {
__u8 status;
__u8 num_params;
- __u8 params_id[0]; /* variable size array */
+ __u8 params_id[]; /* variable size array */
} __packed;
#define NCI_OP_CORE_CONN_CREATE_RSP nci_opcode_pack(NCI_GID_CORE, 0x04)
@@ -372,6 +396,16 @@ struct nci_nfcee_discover_rsp {
/* --------------------------- */
/* ---- NCI Notifications ---- */
/* --------------------------- */
+#define NCI_OP_CORE_RESET_NTF nci_opcode_pack(NCI_GID_CORE, 0x00)
+struct nci_core_reset_ntf {
+ u8 reset_trigger;
+ u8 config_status;
+ u8 nci_ver;
+ u8 manufact_id;
+ u8 manufacturer_specific_len;
+ __le32 manufact_specific_info;
+} __packed;
+
#define NCI_OP_CORE_CONN_CREDITS_NTF nci_opcode_pack(NCI_GID_CORE, 0x06)
struct conn_credit_entry {
__u8 conn_id;
@@ -501,18 +535,18 @@ struct nci_rf_nfcee_action_ntf {
__u8 nfcee_id;
__u8 trigger;
__u8 supported_data_length;
- __u8 supported_data[0];
+ __u8 supported_data[];
} __packed;
#define NCI_OP_NFCEE_DISCOVER_NTF nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x00)
struct nci_nfcee_supported_protocol {
__u8 num_protocol;
- __u8 supported_protocol[0];
+ __u8 supported_protocol[];
} __packed;
struct nci_nfcee_information_tlv {
__u8 num_tlv;
- __u8 information_tlv[0];
+ __u8 information_tlv[];
} __packed;
struct nci_nfcee_discover_ntf {
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 43c9c5d2bedb..ea8595651c38 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -30,6 +30,7 @@ enum nci_flag {
NCI_UP,
NCI_DATA_EXCHANGE,
NCI_DATA_EXCHANGE_TO,
+ NCI_UNREG,
};
/* NCI device states */
@@ -82,10 +83,10 @@ struct nci_ops {
void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd,
struct sk_buff *skb);
- struct nci_driver_ops *prop_ops;
+ const struct nci_driver_ops *prop_ops;
size_t n_prop_ops;
- struct nci_driver_ops *core_ops;
+ const struct nci_driver_ops *core_ops;
size_t n_core_ops;
};
@@ -194,7 +195,7 @@ struct nci_hci_dev {
/* NCI Core structures */
struct nci_dev {
struct nfc_dev *nfc_dev;
- struct nci_ops *ops;
+ const struct nci_ops *ops;
struct nci_hci_dev *hci_dev;
int tx_headroom;
@@ -267,7 +268,7 @@ struct nci_dev {
};
/* ----- NCI Devices ----- */
-struct nci_dev *nci_allocate_device(struct nci_ops *ops,
+struct nci_dev *nci_allocate_device(const struct nci_ops *ops,
__u32 supported_protocols,
int tx_headroom,
int tx_tailroom);
@@ -276,28 +277,31 @@ int nci_register_device(struct nci_dev *ndev);
void nci_unregister_device(struct nci_dev *ndev);
int nci_request(struct nci_dev *ndev,
void (*req)(struct nci_dev *ndev,
- unsigned long opt),
- unsigned long opt, __u32 timeout);
-int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload);
-int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload);
+ const void *opt),
+ const void *opt, __u32 timeout);
+int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len,
+ const __u8 *payload);
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
+ const __u8 *payload);
int nci_core_reset(struct nci_dev *ndev);
int nci_core_init(struct nci_dev *ndev);
int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb);
-int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
+int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val);
int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
u8 number_destination_params,
size_t params_len,
- struct core_conn_create_dest_spec_params *params);
+ const struct core_conn_create_dest_spec_params *params);
int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
-int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
+int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
struct sk_buff **resp);
struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
+void nci_hci_deallocate(struct nci_dev *ndev);
int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
const u8 *param, size_t param_len);
int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate,
@@ -342,7 +346,7 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev)
}
static inline int nci_set_vendor_cmds(struct nci_dev *ndev,
- struct nfc_vendor_cmd *cmds,
+ const struct nfc_vendor_cmd *cmds,
int n_cmds)
{
return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds);
@@ -359,7 +363,7 @@ int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode,
int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
struct sk_buff *skb);
void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb);
-int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload);
+int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload);
int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb);
int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id);
void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
@@ -377,7 +381,7 @@ void nci_req_complete(struct nci_dev *ndev, int result);
struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
int conn_id);
int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
- struct dest_spec_params *params);
+ const struct dest_spec_params *params);
/* ----- NCI status code ----- */
int nci_to_errno(__u8 code);
@@ -430,8 +434,6 @@ struct nci_uart_ops {
int (*open)(struct nci_uart *nci_uart);
void (*close)(struct nci_uart *nci_uart);
int (*recv)(struct nci_uart *nci_uart, struct sk_buff *skb);
- int (*recv_buf)(struct nci_uart *nci_uart, const u8 *data, char *flags,
- int count);
int (*send)(struct nci_uart *nci_uart, struct sk_buff *skb);
void (*tx_start)(struct nci_uart *nci_uart);
void (*tx_done)(struct nci_uart *nci_uart);
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 5d277d68fd8d..5dee575fbe86 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -146,7 +146,7 @@ struct nfc_evt_transaction {
u32 aid_len;
u8 aid[NFC_MAX_AID_LENGTH];
u8 params_len;
- u8 params[0];
+ u8 params[];
} __packed;
struct nfc_genl_data {
@@ -188,17 +188,17 @@ struct nfc_dev {
struct rfkill *rfkill;
- struct nfc_vendor_cmd *vendor_cmds;
+ const struct nfc_vendor_cmd *vendor_cmds;
int n_vendor_cmds;
- struct nfc_ops *ops;
+ const struct nfc_ops *ops;
struct genl_info *cur_cmd_info;
};
#define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
extern struct class nfc_class;
-struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
+struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
int tx_headroom,
int tx_tailroom);
@@ -245,7 +245,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data)
*
* @dev: The nfc device
*/
-static inline void *nfc_get_drvdata(struct nfc_dev *dev)
+static inline void *nfc_get_drvdata(const struct nfc_dev *dev)
{
return dev_get_drvdata(&dev->dev);
}
@@ -255,7 +255,7 @@ static inline void *nfc_get_drvdata(struct nfc_dev *dev)
*
* @dev: The nfc device whose name to return
*/
-static inline const char *nfc_device_name(struct nfc_dev *dev)
+static inline const char *nfc_device_name(const struct nfc_dev *dev)
{
return dev_name(&dev->dev);
}
@@ -266,7 +266,7 @@ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk,
struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp);
int nfc_set_remote_general_bytes(struct nfc_dev *dev,
- u8 *gt, u8 gt_len);
+ const u8 *gt, u8 gt_len);
u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len);
int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
@@ -280,7 +280,7 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
u8 comm_mode, u8 rf_mode);
int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
- u8 *gb, size_t gb_len);
+ const u8 *gb, size_t gb_len);
int nfc_tm_deactivated(struct nfc_dev *dev);
int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb);
@@ -297,7 +297,7 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
u8 payload_type, u8 direction);
static inline int nfc_set_vendor_cmds(struct nfc_dev *dev,
- struct nfc_vendor_cmd *cmds,
+ const struct nfc_vendor_cmd *cmds,
int n_cmds)
{
if (dev->vendor_cmds || dev->n_vendor_cmds)
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index ddcee128f5d9..f5850b569c52 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -19,6 +19,8 @@
*
*/
+#include <linux/types.h>
+
#define NL802154_GENL_NAME "nl802154"
enum nl802154_commands {
@@ -56,9 +58,6 @@ enum nl802154_commands {
NL802154_CMD_SET_WPAN_PHY_NETNS,
- /* add new commands above here */
-
-#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
NL802154_CMD_SET_SEC_PARAMS,
NL802154_CMD_GET_SEC_KEY, /* can dump */
NL802154_CMD_NEW_SEC_KEY,
@@ -72,7 +71,8 @@ enum nl802154_commands {
NL802154_CMD_GET_SEC_LEVEL, /* can dump */
NL802154_CMD_NEW_SEC_LEVEL,
NL802154_CMD_DEL_SEC_LEVEL,
-#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
+ /* add new commands above here */
/* used to define NL802154_CMD_MAX below */
__NL802154_CMD_AFTER_LAST,
@@ -150,10 +150,9 @@ enum nl802154_attrs {
};
enum nl802154_iftype {
- /* for backwards compatibility TODO */
- NL802154_IFTYPE_UNSPEC = -1,
+ NL802154_IFTYPE_UNSPEC = (~(__u32)0),
- NL802154_IFTYPE_NODE,
+ NL802154_IFTYPE_NODE = 0,
NL802154_IFTYPE_MONITOR,
NL802154_IFTYPE_COORD,
diff --git a/include/net/p8022.h b/include/net/p8022.h
index c2bacc66bfbc..b690ffcad66b 100644
--- a/include/net/p8022.h
+++ b/include/net/p8022.h
@@ -1,6 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_P8022_H
#define _NET_P8022_H
+
+struct net_device;
+struct packet_type;
+struct sk_buff;
+
struct datalink_proto *
register_8022_client(unsigned char type,
int (*func)(struct sk_buff *skb,
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index cfbed00ba7ee..813c93499f20 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -45,7 +45,10 @@
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
+ PP_FLAG_DMA_SYNC_DEV |\
+ PP_FLAG_PAGE_FRAG)
/*
* Fast allocation side cache array/stack
@@ -65,7 +68,7 @@
#define PP_ALLOC_CACHE_REFILL 64
struct pp_alloc_cache {
u32 count;
- void *cache[PP_ALLOC_CACHE_SIZE];
+ struct page *cache[PP_ALLOC_CACHE_SIZE];
};
struct page_pool_params {
@@ -77,8 +80,73 @@ struct page_pool_params {
enum dma_data_direction dma_dir; /* DMA mapping direction */
unsigned int max_len; /* max DMA sync memory size */
unsigned int offset; /* DMA addr offset */
+ void (*init_callback)(struct page *page, void *arg);
+ void *init_arg;
};
+#ifdef CONFIG_PAGE_POOL_STATS
+struct page_pool_alloc_stats {
+ u64 fast; /* fast path allocations */
+ u64 slow; /* slow-path order 0 allocations */
+ u64 slow_high_order; /* slow-path high order allocations */
+ u64 empty; /* failed refills due to empty ptr ring, forcing
+ * slow path allocation
+ */
+ u64 refill; /* allocations via successful refill */
+ u64 waive; /* failed refills due to numa zone mismatch */
+};
+
+struct page_pool_recycle_stats {
+ u64 cached; /* recycling placed page in the cache. */
+ u64 cache_full; /* cache was full */
+ u64 ring; /* recycling placed page back into ptr ring */
+ u64 ring_full; /* page was released from page-pool because
+ * PTR ring was full.
+ */
+ u64 released_refcnt; /* page released because of elevated
+ * refcnt
+ */
+};
+
+/* This struct wraps the above stats structs so users of the
+ * page_pool_get_stats API can pass a single argument when requesting the
+ * stats for the page pool.
+ */
+struct page_pool_stats {
+ struct page_pool_alloc_stats alloc_stats;
+ struct page_pool_recycle_stats recycle_stats;
+};
+
+int page_pool_ethtool_stats_get_count(void);
+u8 *page_pool_ethtool_stats_get_strings(u8 *data);
+u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
+
+/*
+ * Drivers that wish to harvest page pool stats and report them to users
+ * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
+ * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
+ */
+bool page_pool_get_stats(struct page_pool *pool,
+ struct page_pool_stats *stats);
+#else
+
+static inline int page_pool_ethtool_stats_get_count(void)
+{
+ return 0;
+}
+
+static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
+{
+ return data;
+}
+
+static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+{
+ return data;
+}
+
+#endif
+
struct page_pool {
struct page_pool_params p;
@@ -88,6 +156,15 @@ struct page_pool {
unsigned long defer_warn;
u32 pages_state_hold_cnt;
+ unsigned int frag_offset;
+ struct page *frag_page;
+ long frag_users;
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ /* these stats are incremented while in softirq context */
+ struct page_pool_alloc_stats alloc_stats;
+#endif
+ u32 xdp_mem_id;
/*
* Data structure for allocation side
@@ -117,6 +194,10 @@ struct page_pool {
*/
struct ptr_ring ring;
+#ifdef CONFIG_PAGE_POOL_STATS
+ /* recycle stats are per-cpu to avoid locking */
+ struct page_pool_recycle_stats __percpu *recycle_stats;
+#endif
atomic_t pages_state_release_cnt;
/* A page_pool is strictly tied to a single RX-queue being
@@ -137,6 +218,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
return page_pool_alloc_pages(pool, gfp);
}
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+ unsigned int size, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
/* get the stored dma direction. A driver might decide to treat this locally and
* avoid the extra cache line from page_pool to determine the direction
*/
@@ -146,60 +239,126 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
return pool->p.dma_dir;
}
+bool page_pool_return_skb_page(struct page *page);
+
struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct xdp_mem_info;
+
#ifdef CONFIG_PAGE_POOL
void page_pool_destroy(struct page_pool *pool);
-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
+ struct xdp_mem_info *mem);
+void page_pool_release_page(struct page_pool *pool, struct page *page);
+void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ int count);
#else
static inline void page_pool_destroy(struct page_pool *pool)
{
}
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
- void (*disconnect)(void *))
+ void (*disconnect)(void *),
+ struct xdp_mem_info *mem)
+{
+}
+static inline void page_pool_release_page(struct page_pool *pool,
+ struct page *page)
+{
+}
+
+static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ int count)
{
}
#endif
-/* Never call this directly, use helpers below */
-void __page_pool_put_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct);
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct);
+
+static inline void page_pool_fragment_page(struct page *page, long nr)
+{
+ atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_defrag_page(struct page *page, long nr)
+{
+ long ret;
+
+ /* If nr == pp_frag_count then we have cleared all remaining
+ * references to the page. No need to actually overwrite it, instead
+ * we can leave this to be overwritten by the calling function.
+ *
+ * The main advantage to doing this is that an atomic_read is
+ * generally a much cheaper operation than an atomic update,
+ * especially when dealing with a page that may be partitioned
+ * into only 2 or 3 pieces.
+ */
+ if (atomic_long_read(&page->pp_frag_count) == nr)
+ return 0;
+
+ ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ WARN_ON(ret < 0);
+ return ret;
+}
+
+static inline bool page_pool_is_last_frag(struct page_pool *pool,
+ struct page *page)
+{
+ /* If fragments aren't enabled or count is 0 we were the last user */
+ return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ (page_pool_defrag_page(page, 1) == 0);
+}
static inline void page_pool_put_page(struct page_pool *pool,
- struct page *page, bool allow_direct)
+ struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- __page_pool_put_page(pool, page, -1, allow_direct);
+ if (!page_pool_is_last_frag(pool, page))
+ return;
+
+ page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
#endif
}
-/* Very limited use-cases allow recycle direct */
+
+/* Same as above but will try to sync the entire area pool->max_len */
+static inline void page_pool_put_full_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
+{
+ page_pool_put_page(pool, page, -1, allow_direct);
+}
+
+/* Same as above but the caller must guarantee safe context. e.g NAPI */
static inline void page_pool_recycle_direct(struct page_pool *pool,
struct page *page)
{
- __page_pool_put_page(pool, page, -1, true);
+ page_pool_put_full_page(pool, page, true);
}
-/* Disconnects a page (from a page_pool). API users can have a need
- * to disconnect a page (from a page_pool), to allow it to be used as
- * a regular page (that will eventually be returned to the normal
- * page-allocator via put_page).
- */
-void page_pool_unmap_page(struct page_pool *pool, struct page *page);
-static inline void page_pool_release_page(struct page_pool *pool,
- struct page *page)
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+ (sizeof(dma_addr_t) > sizeof(unsigned long))
+
+static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
-#ifdef CONFIG_PAGE_POOL
- page_pool_unmap_page(pool, page);
-#endif
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
+ return ret;
}
-static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
{
- return page->dma_addr;
+ page->dma_addr = addr;
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ page->dma_addr_upper = upper_32_bits(addr);
}
static inline bool is_page_pool_compiled_in(void)
@@ -223,4 +382,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
if (unlikely(pool->p.nid != new_nid))
page_pool_update_nid(pool, new_nid);
}
+
+static inline void page_pool_ring_lock(struct page_pool *pool)
+ __acquires(&pool->ring.producer_lock)
+{
+ if (in_serving_softirq())
+ spin_lock(&pool->ring.producer_lock);
+ else
+ spin_lock_bh(&pool->ring.producer_lock);
+}
+
+static inline void page_pool_ring_unlock(struct page_pool *pool)
+ __releases(&pool->ring.producer_lock)
+{
+ if (in_serving_softirq())
+ spin_unlock(&pool->ring.producer_lock);
+ else
+ spin_unlock_bh(&pool->ring.producer_lock);
+}
+
#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index 27b1ab5e4e6d..645dddf5ce77 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -10,6 +10,9 @@
#ifndef NET_PHONET_PEP_H
#define NET_PHONET_PEP_H
+#include <linux/skbuff.h>
+#include <net/phonet/phonet.h>
+
struct pep_sock {
struct pn_sock pn_sk;
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index a27bdc6cfeab..862f1719b523 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -10,6 +10,10 @@
#ifndef AF_PHONET_H
#define AF_PHONET_H
+#include <linux/phonet.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
/*
* The lower layers may not require more space, ever. Make sure it's
* enough.
diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 05b49d4d2b11..e9dc8dca5817 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -10,6 +10,11 @@
#ifndef PN_DEV_H
#define PN_DEV_H
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+struct net;
+
struct phonet_device_list {
struct list_head list;
struct mutex lock;
diff --git a/include/net/pie.h b/include/net/pie.h
index fd5a37cb7993..3fe2361e03b4 100644
--- a/include/net/pie.h
+++ b/include/net/pie.h
@@ -8,7 +8,7 @@
#include <net/inet_ecn.h>
#include <net/pkt_sched.h>
-#define MAX_PROB U64_MAX
+#define MAX_PROB (U64_MAX >> BITS_PER_BYTE)
#define DTIME_INVALID U64_MAX
#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
@@ -38,16 +38,15 @@ struct pie_params {
/**
* struct pie_vars - contains pie variables
- * @qdelay: current queue delay
- * @qdelay_old: queue delay in previous qdelay calculation
- * @burst_time: burst time allowance
- * @dq_tstamp: timestamp at which dq rate was last calculated
- * @prob: drop probability
- * @accu_prob: accumulated drop probability
- * @dq_count: number of bytes dequeued in a measurement cycle
- * @avg_dq_rate: calculated average dq rate
- * @qlen_old: queue length during previous qdelay calculation
- * @accu_prob_overflows: number of times accu_prob overflows
+ * @qdelay: current queue delay
+ * @qdelay_old: queue delay in previous qdelay calculation
+ * @burst_time: burst time allowance
+ * @dq_tstamp: timestamp at which dq rate was last calculated
+ * @prob: drop probability
+ * @accu_prob: accumulated drop probability
+ * @dq_count: number of bytes dequeued in a measurement cycle
+ * @avg_dq_rate: calculated average dq rate
+ * @backlog_old: queue backlog during previous qdelay calculation
*/
struct pie_vars {
psched_time_t qdelay;
@@ -58,8 +57,7 @@ struct pie_vars {
u64 accu_prob;
u64 dq_count;
u32 avg_dq_rate;
- u32 qlen_old;
- u8 accu_prob_overflows;
+ u32 backlog_old;
};
/**
@@ -107,7 +105,6 @@ static inline void pie_vars_init(struct pie_vars *vars)
vars->accu_prob = 0;
vars->dq_count = DQCOUNT_INVALID;
vars->avg_dq_rate = 0;
- vars->accu_prob_overflows = 0;
}
static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
@@ -127,12 +124,12 @@ static inline void pie_set_enqueue_time(struct sk_buff *skb)
}
bool pie_drop_early(struct Qdisc *sch, struct pie_params *params,
- struct pie_vars *vars, u32 qlen, u32 packet_size);
+ struct pie_vars *vars, u32 backlog, u32 packet_size);
void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
- struct pie_vars *vars, u32 qlen);
+ struct pie_vars *vars, u32 backlog);
void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
- u32 qlen);
+ u32 backlog);
#endif
diff --git a/include/net/ping.h b/include/net/ping.h
index 2fe78874318c..e4ff3911cbf5 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -71,12 +71,12 @@ void ping_err(struct sk_buff *skb, int offset, u32 info);
int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd,
struct sk_buff *);
-int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int flags, int *addr_len);
int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
void *user_icmph, size_t icmph_len);
int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-bool ping_rcv(struct sk_buff *skb);
+enum skb_drop_reason ping_rcv(struct sk_buff *skb);
#ifdef CONFIG_PROC_FS
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a972244ab193..4cabb32a2ad9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -23,7 +23,7 @@ struct tcf_walker {
};
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
-int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
+void unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
struct tcf_block_ext_info {
enum flow_block_binder_type binder_type;
@@ -32,6 +32,12 @@ struct tcf_block_ext_info {
u32 block_index;
};
+struct tcf_qevent {
+ struct tcf_block *block;
+ struct tcf_block_ext_info info;
+ struct tcf_proto __rcu *filter_chain;
+};
+
struct tcf_block_cb;
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
@@ -42,7 +48,7 @@ void tcf_chain_put_by_act(struct tcf_chain *chain);
struct tcf_chain *tcf_get_next_chain(struct tcf_block *block,
struct tcf_chain *chain);
struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain,
- struct tcf_proto *tp, bool rtnl_held);
+ struct tcf_proto *tp);
void tcf_block_netif_keep_dst(struct tcf_block *block);
int tcf_block_get(struct tcf_block **p_block,
struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
@@ -70,8 +76,23 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
return block->q;
}
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res, bool compat_mode);
+int tcf_classify(struct sk_buff *skb,
+ const struct tcf_block *block,
+ const struct tcf_proto *tp, struct tcf_result *res,
+ bool compat_mode);
+
+static inline bool tc_cls_stats_dump(struct tcf_proto *tp,
+ struct tcf_walker *arg,
+ void *filter)
+{
+ if (arg->count >= arg->skip && arg->fn(tp, filter, arg) < 0) {
+ arg->stop = 1;
+ return false;
+ }
+
+ arg->count++;
+ return true;
+}
#else
static inline bool tcf_block_shared(struct tcf_block *block)
@@ -128,11 +149,14 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
{
}
-static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+static inline int tcf_classify(struct sk_buff *skb,
+ const struct tcf_block *block,
+ const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
return TC_ACT_UNSPEC;
}
+
#endif
static inline unsigned long
@@ -186,12 +210,25 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
__tcf_unbind_filter(q, r);
}
+static inline void tc_cls_bind_class(u32 classid, unsigned long cl,
+ void *q, struct tcf_result *res,
+ unsigned long base)
+{
+ if (res->classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, res, base);
+ else
+ __tcf_unbind_filter(q, res);
+ }
+}
+
struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
int nr_actions;
struct tc_action **actions;
- struct net *net;
+ struct net *net;
+ netns_tracker ns_tracker;
#endif
/* Map to export classifier specific extension TLV types to the
* generic extensions API. Unsupported extensions must be set to 0.
@@ -206,6 +243,9 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
exts->nr_actions = 0;
+ /* Note: we do not own yet a reference on net.
+ * This reference might be taken later from tcf_exts_get_net().
+ */
exts->net = net;
exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
GFP_KERNEL);
@@ -225,6 +265,8 @@ static inline bool tcf_exts_get_net(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
exts->net = maybe_get_net(exts->net);
+ if (exts->net)
+ netns_tracker_alloc(exts->net, &exts->ns_tracker, GFP_KERNEL);
return exts->net != NULL;
#else
return true;
@@ -235,7 +277,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
if (exts->net)
- put_net(exts->net);
+ put_net_track(exts->net, &exts->ns_tracker);
#endif
}
@@ -247,22 +289,31 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
for (; 0; (void)(i), (void)(a), (void)(exts))
#endif
+#define tcf_act_for_each_action(i, a, actions) \
+ for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++)
+
static inline void
-tcf_exts_stats_update(const struct tcf_exts *exts,
- u64 bytes, u64 packets, u64 lastuse)
+tcf_exts_hw_stats_update(const struct tcf_exts *exts,
+ u64 bytes, u64 packets, u64 drops, u64 lastuse,
+ u8 used_hw_stats, bool used_hw_stats_valid)
{
#ifdef CONFIG_NET_CLS_ACT
int i;
- preempt_disable();
-
for (i = 0; i < exts->nr_actions; i++) {
struct tc_action *a = exts->actions[i];
- tcf_action_stats_update(a, bytes, packets, lastuse, true);
- }
+ /* if stats from hw, just skip */
+ if (tcf_action_update_hw_stats(a)) {
+ preempt_disable();
+ tcf_action_stats_update(a, bytes, packets, drops,
+ lastuse, true);
+ preempt_enable();
- preempt_enable();
+ a->used_hw_stats = used_hw_stats;
+ a->used_hw_stats_valid = used_hw_stats_valid;
+ }
+ }
#endif
}
@@ -304,15 +355,22 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
- struct tcf_exts *exts, bool ovr, bool rtnl_held,
+ struct tcf_exts *exts, u32 flags,
struct netlink_ext_ack *extack);
+int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
+ struct nlattr *rate_tlv, struct tcf_exts *exts,
+ u32 flags, u32 fl_flags, struct netlink_ext_ack *extack);
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
/**
* struct tcf_pkt_info - packet information
+ *
+ * @ptr: start of the pkt data
+ * @nexthdr: offset of the next header
*/
struct tcf_pkt_info {
unsigned char * ptr;
@@ -331,6 +389,7 @@ struct tcf_ematch_ops;
* @ops: the operations lookup table of the corresponding ematch module
* @datalen: length of the ematch specific configuration data
* @data: ematch specific data
+ * @net: the network namespace
*/
struct tcf_ematch {
struct tcf_ematch_ops * ops;
@@ -488,13 +547,17 @@ tcf_change_indev(struct net *net, struct nlattr *indev_tlv,
char indev[IFNAMSIZ];
struct net_device *dev;
- if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) {
- NL_SET_ERR_MSG(extack, "Interface name too long");
+ if (nla_strscpy(indev, indev_tlv, IFNAMSIZ) < 0) {
+ NL_SET_ERR_MSG_ATTR(extack, indev_tlv,
+ "Interface name too long");
return -EINVAL;
}
dev = __dev_get_by_name(net, indev);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_ATTR(extack, indev_tlv,
+ "Network device not found");
return -ENODEV;
+ }
return dev->ifindex;
}
@@ -508,9 +571,13 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
return ifindex == skb->skb_iif;
}
-int tc_setup_flow_action(struct flow_action *flow_action,
- const struct tcf_exts *exts, bool rtnl_held);
-void tc_cleanup_flow_action(struct flow_action *flow_action);
+int tc_setup_offload_action(struct flow_action *flow_action,
+ const struct tcf_exts *exts,
+ struct netlink_ext_ack *extack);
+void tc_cleanup_offload_action(struct flow_action *flow_action);
+int tc_setup_action(struct flow_action *flow_action,
+ struct tc_action *actions[],
+ struct netlink_ext_ack *extack);
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
void *type_data, bool err_stop, bool rtnl_held);
@@ -531,6 +598,49 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
void *cb_priv, u32 *flags, unsigned int *in_hw_count);
unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
+#ifdef CONFIG_NET_CLS_ACT
+int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
+ enum flow_block_binder_type binder_type,
+ struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack);
+void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch);
+int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack);
+struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
+ struct sk_buff **to_free, int *ret);
+int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe);
+#else
+static inline int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
+ enum flow_block_binder_type binder_type,
+ struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
+static inline void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
+{
+}
+
+static inline int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
+static inline struct sk_buff *
+tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
+ struct sk_buff **to_free, int *ret)
+{
+ return skb;
+}
+
+static inline int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
+{
+ return 0;
+}
+#endif
+
struct tc_cls_u32_knode {
struct tcf_exts *exts;
struct tcf_result *res;
@@ -638,6 +748,17 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
cls_common->extack = extack;
}
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
+{
+ struct tc_skb_ext *tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+
+ if (tc_skb_ext)
+ memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
+ return tc_skb_ext;
+}
+#endif
+
enum tc_matchall_command {
TC_CLSMATCHALL_REPLACE,
TC_CLSMATCHALL_DESTROY,
@@ -687,7 +808,7 @@ struct tc_cookie {
};
struct tc_qopt_offload_stats {
- struct gnet_stats_basic_packed *bstats;
+ struct gnet_stats_basic_sync *bstats;
struct gnet_stats_queue *qstats;
};
@@ -712,6 +833,41 @@ struct tc_mq_qopt_offload {
};
};
+enum tc_htb_command {
+ /* Root */
+ TC_HTB_CREATE, /* Initialize HTB offload. */
+ TC_HTB_DESTROY, /* Destroy HTB offload. */
+
+ /* Classes */
+ /* Allocate qid and create leaf. */
+ TC_HTB_LEAF_ALLOC_QUEUE,
+ /* Convert leaf to inner, preserve and return qid, create new leaf. */
+ TC_HTB_LEAF_TO_INNER,
+ /* Delete leaf, while siblings remain. */
+ TC_HTB_LEAF_DEL,
+ /* Delete leaf, convert parent to leaf, preserving qid. */
+ TC_HTB_LEAF_DEL_LAST,
+ /* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */
+ TC_HTB_LEAF_DEL_LAST_FORCE,
+ /* Modify parameters of a node. */
+ TC_HTB_NODE_MODIFY,
+
+ /* Class qdisc */
+ TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */
+};
+
+struct tc_htb_qopt_offload {
+ struct netlink_ext_ack *extack;
+ enum tc_htb_command command;
+ u32 parent_classid;
+ u16 classid;
+ u16 qid;
+ u64 rate;
+ u64 ceil;
+};
+
+#define TC_HTB_CLASSID_ROOT U32_MAX
+
enum tc_red_command {
TC_RED_REPLACE,
TC_RED_DESTROY,
@@ -727,6 +883,7 @@ struct tc_red_qopt_offload_params {
u32 limit;
bool is_ecn;
bool is_harddrop;
+ bool is_nodrop;
struct gnet_stats_queue *qstats;
};
@@ -771,7 +928,7 @@ struct tc_gred_qopt_offload_params {
};
struct tc_gred_qopt_offload_stats {
- struct gnet_stats_basic_packed bstats[MAX_DPs];
+ struct gnet_stats_basic_sync bstats[MAX_DPs];
struct gnet_stats_queue qstats[MAX_DPs];
struct red_stats *xstats[MAX_DPs];
};
@@ -863,6 +1020,7 @@ enum tc_tbf_command {
TC_TBF_REPLACE,
TC_TBF_DESTROY,
TC_TBF_STATS,
+ TC_TBF_GRAFT,
};
struct tc_tbf_qopt_offload_replace_params {
@@ -878,7 +1036,34 @@ struct tc_tbf_qopt_offload {
union {
struct tc_tbf_qopt_offload_replace_params replace_params;
struct tc_qopt_offload_stats stats;
+ u32 child_handle;
+ };
+};
+
+enum tc_fifo_command {
+ TC_FIFO_REPLACE,
+ TC_FIFO_DESTROY,
+ TC_FIFO_STATS,
+};
+
+struct tc_fifo_qopt_offload {
+ enum tc_fifo_command command;
+ u32 handle;
+ u32 parent;
+ union {
+ struct tc_qopt_offload_stats stats;
};
};
+#ifdef CONFIG_NET_CLS_ACT
+DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+void tc_skb_ext_tc_enable(void);
+void tc_skb_ext_tc_disable(void);
+#define tc_skb_ext_tc_enabled() static_branch_unlikely(&tc_skb_ext_tc)
+#else /* CONFIG_NET_CLS_ACT */
+static inline void tc_skb_ext_tc_enable(void) { }
+static inline void tc_skb_ext_tc_disable(void) { }
+#define tc_skb_ext_tc_enabled() false
+#endif
+
#endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 6a70845bd9ab..38207873eda6 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -11,6 +11,7 @@
#include <uapi/linux/pkt_sched.h>
#define DEFAULT_TX_QUEUE_LEN 1000
+#define STAB_SIZE_LOG_MAX 30
struct qdisc_walker {
int stop;
@@ -19,12 +20,14 @@ struct qdisc_walker {
int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
};
-#define QDISC_ALIGNTO 64
-#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
-
static inline void *qdisc_priv(struct Qdisc *q)
{
- return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc));
+ return &q->privdata;
+}
+
+static inline struct Qdisc *qdisc_from_priv(void *priv)
+{
+ return container_of(priv, struct Qdisc, privdata);
}
/*
@@ -60,12 +63,6 @@ static inline psched_time_t psched_get_time(void)
return PSCHED_NS2TICKS(ktime_get_ns());
}
-static inline psched_tdiff_t
-psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
-{
- return min(tv1 - tv2, bound);
-}
-
struct qdisc_watchdog {
u64 last_expires;
struct hrtimer timer;
@@ -75,7 +72,15 @@ struct qdisc_watchdog {
void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
clockid_t clockid);
void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
+
+void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
+ u64 delta_ns);
+
+static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd,
+ u64 expires)
+{
+ return qdisc_watchdog_schedule_range_ns(wd, expires, 0ULL);
+}
static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
psched_time_t expires)
@@ -95,7 +100,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
struct netlink_ext_ack *extack);
int register_qdisc(struct Qdisc_ops *qops);
-int unregister_qdisc(struct Qdisc_ops *qops);
+void unregister_qdisc(struct Qdisc_ops *qops);
void qdisc_get_default(char *id, size_t len);
int qdisc_set_default(const char *id);
@@ -118,27 +123,11 @@ void __qdisc_run(struct Qdisc *q);
static inline void qdisc_run(struct Qdisc *q)
{
if (qdisc_run_begin(q)) {
- /* NOLOCK qdisc must check 'state' under the qdisc seqlock
- * to avoid racing with dev_qdisc_reset()
- */
- if (!(q->flags & TCQ_F_NOLOCK) ||
- likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
- __qdisc_run(q);
+ __qdisc_run(q);
qdisc_run_end(q);
}
}
-static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
-{
- /* We need to take extra care in case the skb came via
- * vlan accelerated path. In that case, use skb->vlan_proto
- * as the original vlan header was already stripped.
- */
- if (skb_vlan_tag_present(skb))
- return skb->vlan_proto;
- return skb->protocol;
-}
-
/* Calculate maximal size of packet seen by hard_start_xmit
routine of this device.
*/
@@ -152,6 +141,11 @@ static inline struct net *qdisc_net(struct Qdisc *q)
return dev_net(q->dev_queue->dev);
}
+struct tc_query_caps_base {
+ enum tc_setup_type type;
+ void *caps;
+};
+
struct tc_cbs_qopt_offload {
u8 enable;
s32 queue;
@@ -166,6 +160,10 @@ struct tc_etf_qopt_offload {
s32 queue;
};
+struct tc_taprio_caps {
+ bool supports_queue_max_sdu:1;
+};
+
struct tc_taprio_sched_entry {
u8 command; /* TC_TAPRIO_CMD_* */
@@ -179,14 +177,72 @@ struct tc_taprio_qopt_offload {
ktime_t base_time;
u64 cycle_time;
u64 cycle_time_extension;
+ u32 max_sdu[TC_MAX_QUEUE];
size_t num_entries;
- struct tc_taprio_sched_entry entries[0];
+ struct tc_taprio_sched_entry entries[];
};
+#if IS_ENABLED(CONFIG_NET_SCH_TAPRIO)
+
/* Reference counting */
struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
*offload);
void taprio_offload_free(struct tc_taprio_qopt_offload *offload);
+#else
+
+/* Reference counting */
+static inline struct tc_taprio_qopt_offload *
+taprio_offload_get(struct tc_taprio_qopt_offload *offload)
+{
+ return NULL;
+}
+
+static inline void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
+{
+}
+
+#endif
+
+/* Ensure skb_mstamp_ns, which might have been populated with the txtime, is
+ * not mistaken for a software timestamp, because this will otherwise prevent
+ * the dispatch of hardware timestamps to the socket.
+ */
+static inline void skb_txtime_consumed(struct sk_buff *skb)
+{
+ skb->tstamp = ktime_set(0, 0);
+}
+
+struct tc_skb_cb {
+ struct qdisc_skb_cb qdisc_cb;
+
+ u16 mru;
+ u8 post_ct:1;
+ u8 post_ct_snat:1;
+ u8 post_ct_dnat:1;
+ u16 zone; /* Only valid if post_ct = true */
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+ return cb;
+}
+
+static inline bool tc_qdisc_stats_dump(struct Qdisc *sch,
+ unsigned long cl,
+ struct qdisc_walker *arg)
+{
+ if (arg->count >= arg->skip && arg->fn(sch, cl, arg) < 0) {
+ arg->stop = 1;
+ return false;
+ }
+
+ arg->count++;
+ return true;
+}
+
#endif
diff --git a/include/net/pptp.h b/include/net/pptp.h
index 383e25ca53a7..e63176bdd4c8 100644
--- a/include/net/pptp.h
+++ b/include/net/pptp.h
@@ -2,6 +2,9 @@
#ifndef _NET_PPTP_H
#define _NET_PPTP_H
+#include <linux/types.h>
+#include <net/gre.h>
+
#define PPP_LCP_ECHOREQ 0x09
#define PPP_LCP_ECHOREP 0x0A
#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 2b778e1d2d8f..6aef8cb11cc8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -35,15 +35,12 @@
/* This is used to register protocols. */
struct net_protocol {
- int (*early_demux)(struct sk_buff *skb);
- int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
int (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1,
- netns_ok:1,
/* does the protocol do more stringent
* icmp tag validation than simple
* socket lookup?
@@ -53,8 +50,6 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
- void (*early_demux)(struct sk_buff *skb);
- void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
diff --git a/include/net/psample.h b/include/net/psample.h
index 68ae16bb0a4a..0509d2d6be67 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -14,22 +14,35 @@ struct psample_group {
struct rcu_head rcu;
};
+struct psample_metadata {
+ u32 trunc_size;
+ int in_ifindex;
+ int out_ifindex;
+ u16 out_tc;
+ u64 out_tc_occ; /* bytes */
+ u64 latency; /* nanoseconds */
+ u8 out_tc_valid:1,
+ out_tc_occ_valid:1,
+ latency_valid:1,
+ unused:5;
+};
+
struct psample_group *psample_group_get(struct net *net, u32 group_num);
void psample_group_take(struct psample_group *group);
void psample_group_put(struct psample_group *group);
+struct sk_buff;
+
#if IS_ENABLED(CONFIG_PSAMPLE)
void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
- u32 trunc_size, int in_ifindex, int out_ifindex,
- u32 sample_rate);
+ u32 sample_rate, const struct psample_metadata *md);
#else
static inline void psample_sample_packet(struct psample_group *group,
- struct sk_buff *skb, u32 trunc_size,
- int in_ifindex, int out_ifindex,
- u32 sample_rate)
+ struct sk_buff *skb, u32 sample_rate,
+ const struct psample_metadata *md)
{
}
diff --git a/include/net/psnap.h b/include/net/psnap.h
index 7cb0c8ab4171..88802b0754ad 100644
--- a/include/net/psnap.h
+++ b/include/net/psnap.h
@@ -2,6 +2,11 @@
#ifndef _NET_PSNAP_H
#define _NET_PSNAP_H
+struct datalink_proto;
+struct sk_buff;
+struct packet_type;
+struct net_device;
+
struct datalink_proto *
register_snap_client(const unsigned char *desc,
int (*rcvfunc)(struct sk_buff *, struct net_device *,
diff --git a/include/net/raw.h b/include/net/raw.h
index 8ad8df594853..5e665934ebc7 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -20,9 +20,8 @@
extern struct proto raw_prot;
extern struct raw_hashinfo raw_v4_hashinfo;
-struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
- unsigned short num, __be32 raddr,
- __be32 laddr, int dif, int sdif);
+bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr, int dif, int sdif);
int raw_abort(struct sock *sk, int err);
void raw_icmp_error(struct sk_buff *, int, u32);
@@ -33,10 +32,19 @@ int raw_rcv(struct sock *, struct sk_buff *);
#define RAW_HTABLE_SIZE MAX_INET_PROTOS
struct raw_hashinfo {
- rwlock_t lock;
- struct hlist_head ht[RAW_HTABLE_SIZE];
+ spinlock_t lock;
+ struct hlist_nulls_head ht[RAW_HTABLE_SIZE];
};
+static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
+{
+ int i;
+
+ spin_lock_init(&hashinfo->lock);
+ for (i = 0; i < RAW_HTABLE_SIZE; i++)
+ INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i);
+}
+
#ifdef CONFIG_PROC_FS
int raw_proc_init(void);
void raw_proc_exit(void);
@@ -75,7 +83,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept,
+ return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index 53d86b6055e8..bc70909625f6 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -3,11 +3,12 @@
#define _NET_RAWV6_H
#include <net/protocol.h>
+#include <net/raw.h>
extern struct raw_hashinfo raw_v6_hashinfo;
-struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
- unsigned short num, const struct in6_addr *loc_addr,
- const struct in6_addr *rmt_addr, int dif, int sdif);
+bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num,
+ const struct in6_addr *loc_addr,
+ const struct in6_addr *rmt_addr, int dif, int sdif);
int raw_abort(struct sock *sk, int err);
diff --git a/include/net/red.h b/include/net/red.h
index 9665582c4687..425364de0df7 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -122,7 +122,6 @@ struct red_stats {
u32 forced_drop; /* Forced drops, qavg > max_thresh */
u32 forced_mark; /* Forced marks, qavg > max_thresh */
u32 pdrop; /* Drops due to queue limits */
- u32 other; /* Drops due to drop() calls */
};
struct red_parms {
@@ -168,17 +167,65 @@ static inline void red_set_vars(struct red_vars *v)
v->qcount = -1;
}
-static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
+static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog,
+ u8 Scell_log, u8 *stab)
{
- if (fls(qth_min) + Wlog > 32)
+ if (fls(qth_min) + Wlog >= 32)
return false;
- if (fls(qth_max) + Wlog > 32)
+ if (fls(qth_max) + Wlog >= 32)
+ return false;
+ if (Scell_log >= 32)
return false;
if (qth_max < qth_min)
return false;
+ if (stab) {
+ int i;
+
+ for (i = 0; i < RED_STAB_SIZE; i++)
+ if (stab[i] >= 32)
+ return false;
+ }
return true;
}
+static inline int red_get_flags(unsigned char qopt_flags,
+ unsigned char historic_mask,
+ struct nlattr *flags_attr,
+ unsigned char supported_mask,
+ struct nla_bitfield32 *p_flags,
+ unsigned char *p_userbits,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 flags;
+
+ if (qopt_flags && flags_attr) {
+ NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute");
+ return -EINVAL;
+ }
+
+ if (flags_attr) {
+ flags = nla_get_bitfield32(flags_attr);
+ } else {
+ flags.selector = historic_mask;
+ flags.value = qopt_flags & historic_mask;
+ }
+
+ *p_flags = flags;
+ *p_userbits = qopt_flags & ~historic_mask;
+ return 0;
+}
+
+static inline int red_validate_flags(unsigned char flags,
+ struct netlink_ext_ack *extack)
+{
+ if ((flags & TC_RED_NODROP) && !(flags & TC_RED_ECN)) {
+ NL_SET_ERR_MSG_MOD(extack, "nodrop mode is only meaningful with ECN");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static inline void red_set_parms(struct red_parms *p,
u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
u8 Scell_log, u8 *stab, u32 max_P)
@@ -247,7 +294,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(const struct red_parms
int shift;
/*
- * The problem: ideally, average length queue recalcultion should
+ * The problem: ideally, average length queue recalculation should
* be done over constant clock intervals. This is too expensive, so
* that the calculation is driven by outgoing packets.
* When the queue is idle we have to model this clock by hand.
@@ -316,7 +363,7 @@ static inline unsigned long red_calc_qavg(const struct red_parms *p,
static inline u32 red_random(const struct red_parms *p)
{
- return reciprocal_divide(prandom_u32(), p->max_P_reciprocal);
+ return reciprocal_divide(get_random_u32(), p->max_P_reciprocal);
}
static inline int red_mark_probability(const struct red_parms *p,
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 3469750df0f4..896191f420d5 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -1,3 +1,4 @@
+
#ifndef __NET_REGULATORY_H
#define __NET_REGULATORY_H
/*
@@ -19,6 +20,8 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include <linux/ieee80211.h>
+#include <linux/nl80211.h>
#include <linux/rcupdate.h>
/**
@@ -44,7 +47,7 @@ enum environment_cap {
* and potentially inform users of which devices specifically
* cased the conflicts.
* @initiator: indicates who sent this request, could be any of
- * of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*)
+ * those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*)
* @alpha2: the ISO / IEC 3166 alpha2 country code of the requested
* regulatory domain. We have a few special codes:
* 00 - World regulatory domain
@@ -231,13 +234,6 @@ struct ieee80211_regdomain {
struct ieee80211_reg_rule reg_rules[];
};
-#define MHZ_TO_KHZ(freq) ((freq) * 1000)
-#define KHZ_TO_MHZ(freq) ((freq) / 1000)
-#define DBI_TO_MBI(gain) ((gain) * 100)
-#define MBI_TO_DBI(gain) ((gain) / 100)
-#define DBM_TO_MBM(gain) ((gain) * 100)
-#define MBM_TO_DBM(gain) ((gain) / 100)
-
#define REG_RULE_EXT(start, end, bw, gain, eirp, dfs_cac, reg_flags) \
{ \
.freq_range.start_freq_khz = MHZ_TO_KHZ(start), \
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index cf8b33213bbc..144c39db9898 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -41,6 +41,13 @@ struct request_sock_ops {
int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
+struct saved_syn {
+ u32 mac_hdrlen;
+ u32 network_hdrlen;
+ u32 tcp_hdrlen;
+ u8 data[];
+};
+
/* struct request_sock - mini sock to represent a connection request
*/
struct request_sock {
@@ -54,15 +61,16 @@ struct request_sock {
struct request_sock *dl_next;
u16 mss;
u8 num_retrans; /* number of retransmits */
- u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
+ u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */
u8 num_timeout:7; /* number of timeouts */
u32 ts_recent;
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
struct sock *sk;
- u32 *saved_syn;
+ struct saved_syn *saved_syn;
u32 secid;
u32 peer_secid;
+ u32 timeout;
};
static inline struct request_sock *inet_reqsk(const struct sock *sk)
@@ -97,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
+ req->timeout = 0;
req->num_timeout = 0;
req->num_retrans = 0;
req->sk = NULL;
diff --git a/include/net/rose.h b/include/net/rose.h
index cf517d306a28..23267b4efcfa 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -9,6 +9,7 @@
#define _ROSE_H
#include <linux/rose.h>
+#include <net/ax25.h>
#include <net/sock.h>
#define ROSE_ADDR_LEN 5
@@ -131,7 +132,8 @@ struct rose_sock {
ax25_address source_digis[ROSE_MAX_DIGIS];
ax25_address dest_digis[ROSE_MAX_DIGIS];
struct rose_neigh *neighbour;
- struct net_device *device;
+ struct net_device *device;
+ netdevice_tracker dev_tracker;
unsigned int lci, rand;
unsigned char state, condition, qbitincl, defer;
unsigned char cause, diagnostic;
@@ -162,8 +164,8 @@ extern int sysctl_rose_link_fail_timeout;
extern int sysctl_rose_maximum_vcs;
extern int sysctl_rose_window_size;
-int rosecmp(rose_address *, rose_address *);
-int rosecmpm(rose_address *, rose_address *, unsigned short);
+int rosecmp(const rose_address *, const rose_address *);
+int rosecmpm(const rose_address *, const rose_address *, unsigned short);
char *rose2asc(char *buf, const rose_address *);
struct sock *rose_find_socket(unsigned int, struct rose_neigh *);
void rose_kill_by_neigh(struct rose_neigh *);
@@ -205,8 +207,8 @@ extern const struct seq_operations rose_node_seqops;
extern struct seq_operations rose_route_seqops;
void rose_add_loopback_neigh(void);
-int __must_check rose_add_loopback_node(rose_address *);
-void rose_del_loopback_node(rose_address *);
+int __must_check rose_add_loopback_node(const rose_address *);
+void rose_del_loopback_node(const rose_address *);
void rose_rt_device_down(struct net_device *);
void rose_link_device_down(struct net_device *);
struct net_device *rose_dev_first(void);
diff --git a/include/net/route.h b/include/net/route.h
index a9c60fc68e36..6e92dd5bcd61 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -43,6 +43,20 @@
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
+static inline __u8 ip_sock_rt_scope(const struct sock *sk)
+{
+ if (sock_flag(sk, SOCK_LOCALROUTE))
+ return RT_SCOPE_LINK;
+
+ return RT_SCOPE_UNIVERSE;
+}
+
+static inline __u8 ip_sock_rt_tos(const struct sock *sk)
+{
+ return RT_TOS(inet_sk(sk)->tos);
+}
+
+struct ip_tunnel_info;
struct fib_nh;
struct fib_info;
struct uncached_list;
@@ -128,6 +142,12 @@ static inline struct rtable *__ip_route_output_key(struct net *net,
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
const struct sock *sk);
+struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net, __be32 *saddr,
+ const struct ip_tunnel_info *info,
+ u8 protocol, bool use_cache);
+
struct dst_entry *ipv4_blackhole_route(struct net *net,
struct dst_entry *dst_orig);
@@ -159,7 +179,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
if (sk)
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(net, fl4, sk);
}
@@ -181,10 +201,6 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct in_device *in_dev, u32 *itag);
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin);
-int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
- u8 tos, struct net_device *devin,
- struct fib_result *res);
-
int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin,
const struct sk_buff *hint);
@@ -224,8 +240,7 @@ void ip_rt_multicast_event(struct in_device *);
int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
- unsigned int flags, u16 type,
- bool nopolicy, bool noxfrm, bool will_cache);
+ unsigned int flags, u16 type, bool noxfrm);
struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
struct in_ifaddr;
@@ -282,41 +297,40 @@ static inline char rt_tos2priority(u8 tos)
* ip_route_newports() calls.
*/
-static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src,
- u32 tos, int oif, u8 protocol,
+static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
+ __be32 src, int oif, u8 protocol,
__be16 sport, __be16 dport,
- struct sock *sk)
+ const struct sock *sk)
{
__u8 flow_flags = 0;
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
- protocol, flow_flags, dst, src, dport, sport,
- sk->sk_uid);
+ flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
+ ip_sock_rt_scope(sk), protocol, flow_flags, dst,
+ src, dport, sport, sk->sk_uid);
}
-static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
- __be32 dst, __be32 src, u32 tos,
- int oif, u8 protocol,
+static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
+ __be32 src, int oif, u8 protocol,
__be16 sport, __be16 dport,
struct sock *sk)
{
struct net *net = sock_net(sk);
struct rtable *rt;
- ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
- sport, dport, sk);
+ ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk);
if (!dst || !src) {
rt = __ip_route_output_key(net, fl4);
if (IS_ERR(rt))
return rt;
ip_rt_put(rt);
- flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr);
+ flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr,
+ fl4->saddr);
}
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(net, fl4, sk);
}
@@ -332,7 +346,7 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable
flowi4_update_output(fl4, sk->sk_bound_dev_if,
RT_CONN_FLAGS(sk), fl4->daddr,
fl4->saddr);
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(sock_net(sk), fl4, sk);
}
return rt;
@@ -354,7 +368,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
struct net *net = dev_net(dst->dev);
if (hoplimit == 0)
- hoplimit = net->ipv4.sysctl_ip_default_ttl;
+ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
return hoplimit;
}
@@ -363,7 +377,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
{
struct neighbour *neigh;
- neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
diff --git a/include/net/rpl.h b/include/net/rpl.h
new file mode 100644
index 000000000000..308ef0a05cae
--- /dev/null
+++ b/include/net/rpl.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * RPL implementation
+ *
+ * Author:
+ * (C) 2020 Alexander Aring <alex.aring@gmail.com>
+ */
+
+#ifndef _NET_RPL_H
+#define _NET_RPL_H
+
+#include <linux/rpl.h>
+
+#if IS_ENABLED(CONFIG_IPV6_RPL_LWTUNNEL)
+extern int rpl_init(void);
+extern void rpl_exit(void);
+#else
+static inline int rpl_init(void)
+{
+ return 0;
+}
+
+static inline void rpl_exit(void) {}
+#endif
+
+/* Worst decompression memory usage ipv6 address (16) + pad 7 */
+#define IPV6_RPL_SRH_WORST_SWAP_SIZE (sizeof(struct in6_addr) + 7)
+
+size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
+ unsigned char cmpre);
+
+void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr,
+ const struct ipv6_rpl_sr_hdr *inhdr,
+ const struct in6_addr *daddr, unsigned char n);
+
+void ipv6_rpl_srh_compress(struct ipv6_rpl_sr_hdr *outhdr,
+ const struct ipv6_rpl_sr_hdr *inhdr,
+ const struct in6_addr *daddr, unsigned char n);
+
+#endif /* _NET_RPL_H */
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e2091bb2b3a8..bf8bb3357825 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -10,9 +10,23 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
- RTNL_FLAG_DOIT_UNLOCKED = 1,
+ RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
+ RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
};
+enum rtnl_kinds {
+ RTNL_KIND_NEW,
+ RTNL_KIND_DEL,
+ RTNL_KIND_GET,
+ RTNL_KIND_SET
+};
+#define RTNL_KIND_MASK 0x3
+
+static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype)
+{
+ return msgtype & RTNL_KIND_MASK;
+}
+
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
int rtnl_register_module(struct module *owner, int protocol, int msgtype,
@@ -33,9 +47,13 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
*
* @list: Used internally
* @kind: Identifier
+ * @netns_refund: Physical device, move to init_net on netns exit
* @maxtype: Highest device specific netlink attribute number
* @policy: Netlink policy for device specific attribute validation
* @validate: Optional validation function for netlink/changelink parameters
+ * @alloc: netdev allocation function, can be %NULL and is then used
+ * in place of alloc_netdev_mqs(), in this case @priv_size
+ * and @setup are unused. Returns a netdev or ERR_PTR().
* @priv_size: sizeof net_device private space
* @setup: net_device setup function
* @newlink: Function for configuring and registering a new device
@@ -62,8 +80,14 @@ struct rtnl_link_ops {
const char *kind;
size_t priv_size;
+ struct net_device *(*alloc)(struct nlattr *tb[],
+ const char *ifname,
+ unsigned char name_assign_type,
+ unsigned int num_tx_queues,
+ unsigned int num_rx_queues);
void (*setup)(struct net_device *dev);
+ bool netns_refund;
unsigned int maxtype;
const struct nla_policy *policy;
int (*validate)(struct nlattr *tb[],
@@ -143,10 +167,11 @@ struct rtnl_af_ops {
u32 ext_filter_mask);
int (*validate_link_af)(const struct net_device *dev,
- const struct nlattr *attr);
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack);
int (*set_link_af)(struct net_device *dev,
- const struct nlattr *attr);
-
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack);
int (*fill_stats_af)(struct sk_buff *skb,
const struct net_device *dev);
size_t (*get_stats_af_size)(const struct net_device *dev);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 151208704ed2..d5517719af4e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,8 +36,23 @@ struct qdisc_rate_table {
enum qdisc_state_t {
__QDISC_STATE_SCHED,
__QDISC_STATE_DEACTIVATED,
+ __QDISC_STATE_MISSED,
+ __QDISC_STATE_DRAINING,
};
+enum qdisc_state2_t {
+ /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
+ * Use qdisc_run_begin/end() or qdisc_is_running() instead.
+ */
+ __QDISC_STATE2_RUNNING,
+};
+
+#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
+#define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING)
+
+#define QDISC_STATE_NON_EMPTY (QDISC_STATE_MISSED | \
+ QDISC_STATE_DRAINING)
+
struct qdisc_size_table {
struct rcu_head rcu;
struct list_head list;
@@ -89,9 +104,9 @@ struct Qdisc {
struct netdev_queue *dev_queue;
struct net_rate_estimator __rcu *rate_est;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
- int padded;
+ int pad;
refcount_t refcnt;
/*
@@ -99,19 +114,20 @@ struct Qdisc {
*/
struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
struct qdisc_skb_head q;
- struct gnet_stats_basic_packed bstats;
- seqcount_t running;
+ struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
unsigned long state;
+ unsigned long state2; /* must be written under qdisc spinlock */
struct Qdisc *next_sched;
struct sk_buff_head skb_bad_txq;
spinlock_t busylock ____cacheline_aligned_in_smp;
spinlock_t seqlock;
- /* for NOLOCK qdisc, true if there are no enqueued skbs */
- bool empty;
struct rcu_head rcu;
+ netdevice_tracker dev_tracker;
+ /* private data */
+ long privdata[] ____cacheline_aligned;
};
static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
@@ -134,11 +150,20 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
return NULL;
}
+/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
+ * root_lock section, or provide their own memory barriers -- ordering
+ * against qdisc_run_begin/end() atomic bit operations.
+ */
static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK)
return spin_is_locked(&qdisc->seqlock);
- return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
+ return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+}
+
+static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
+{
+ return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
}
static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
@@ -149,32 +174,53 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
if (qdisc_is_percpu_stats(qdisc))
- return READ_ONCE(qdisc->empty);
+ return nolock_qdisc_is_empty(qdisc);
return !READ_ONCE(qdisc->q.qlen);
}
+/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
+ * the qdisc root lock acquired.
+ */
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
- if (!spin_trylock(&qdisc->seqlock))
+ if (spin_trylock(&qdisc->seqlock))
+ return true;
+
+ /* No need to insist if the MISSED flag was already set.
+ * Note that test_and_set_bit() also gives us memory ordering
+ * guarantees wrt potential earlier enqueue() and below
+ * spin_trylock(), both of which are necessary to prevent races
+ */
+ if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state))
return false;
- WRITE_ONCE(qdisc->empty, false);
- } else if (qdisc_is_running(qdisc)) {
- return false;
+
+ /* Try to take the lock again to make sure that we will either
+ * grab it or the CPU that still has it will see MISSED set
+ * when testing it in qdisc_run_end()
+ */
+ return spin_trylock(&qdisc->seqlock);
}
- /* Variant of write_seqcount_begin() telling lockdep a trylock
- * was attempted.
- */
- raw_write_seqcount_begin(&qdisc->running);
- seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
- return true;
+ return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
}
static inline void qdisc_run_end(struct Qdisc *qdisc)
{
- write_seqcount_end(&qdisc->running);
- if (qdisc->flags & TCQ_F_NOLOCK)
+ if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);
+
+ /* spin_unlock() only has store-release semantic. The unlock
+ * and test_bit() ordering is a store-load ordering, so a full
+ * memory barrier is needed here.
+ */
+ smp_mb();
+
+ if (unlikely(test_bit(__QDISC_STATE_MISSED,
+ &qdisc->state)))
+ __netif_schedule(qdisc);
+ } else {
+ __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+ }
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -207,7 +253,8 @@ struct Qdisc_class_ops {
int (*change)(struct Qdisc *, u32, u32,
struct nlattr **, unsigned long *,
struct netlink_ext_ack *);
- int (*delete)(struct Qdisc *, unsigned long);
+ int (*delete)(struct Qdisc *, unsigned long,
+ struct netlink_ext_ack *);
void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
/* Filter manipulation */
@@ -254,6 +301,8 @@ struct Qdisc_ops {
struct netlink_ext_ack *extack);
void (*attach)(struct Qdisc *sch);
int (*change_tx_queue_len)(struct Qdisc *, unsigned int);
+ void (*change_real_num_tx)(struct Qdisc *sch,
+ unsigned int new_real_tx);
int (*dump)(struct Qdisc *, struct sk_buff *);
int (*dump_stats)(struct Qdisc *, struct gnet_dump *);
@@ -277,11 +326,6 @@ struct tcf_result {
};
const struct tcf_proto *goto_tp;
- /* used in the skb_tc_reinsert function */
- struct {
- bool ingress;
- struct gnet_stats_queue *qstats;
- };
};
};
@@ -303,7 +347,7 @@ struct tcf_proto_ops {
int (*change)(struct net *net, struct sk_buff *,
struct tcf_proto*, unsigned long,
u32 handle, struct nlattr **,
- void **, bool, bool,
+ void **, u32,
struct netlink_ext_ack *);
int (*delete)(struct tcf_proto *tp, void *arg,
bool *last, bool rtnl_held,
@@ -330,6 +374,10 @@ struct tcf_proto_ops {
int (*dump)(struct net*, struct tcf_proto*, void *,
struct sk_buff *skb, struct tcmsg*,
bool);
+ int (*terse_dump)(struct net *net,
+ struct tcf_proto *tp, void *fh,
+ struct sk_buff *skb,
+ struct tcmsg *t, bool rtnl_held);
int (*tmplt_dump)(struct sk_buff *skb,
struct net *net,
void *tmplt_priv);
@@ -407,6 +455,7 @@ struct tcf_block {
struct mutex lock;
struct list_head chain_list;
u32 index; /* block index for shared blocks */
+ u32 classid; /* which class this block belongs to */
refcount_t refcnt;
struct net *net;
struct Qdisc *q;
@@ -426,7 +475,6 @@ struct tcf_block {
struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
-#ifdef CONFIG_PROVE_LOCKING
static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
{
return lockdep_is_held(&chain->filter_chain_lock);
@@ -436,17 +484,6 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
{
return lockdep_is_held(&tp->lock);
}
-#else
-static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain)
-{
- return true;
-}
-
-static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
-{
- return true;
-}
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
#define tcf_chain_dereference(p, chain) \
rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))
@@ -458,15 +495,10 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
{
struct qdisc_skb_cb *qcb;
- BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz);
+ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb));
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
-static inline int qdisc_qlen_cpu(const struct Qdisc *q)
-{
- return this_cpu_ptr(q->cpu_qstats)->qlen;
-}
-
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
@@ -514,25 +546,6 @@ static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
return qdisc->dev_queue->qdisc_sleeping;
}
-/* The qdisc root lock is a mechanism by which to top level
- * of a qdisc tree can be locked from any qdisc node in the
- * forest. This allows changing the configuration of some
- * aspect of the qdisc tree while blocking out asynchronous
- * qdisc access in the packet processing paths.
- *
- * It is only legal to do this when the root will not change
- * on us. Otherwise we'll potentially lock the wrong qdisc
- * root. This is enforced by holding the RTNL semaphore, which
- * all users of this lock accessor must do.
- */
-static inline spinlock_t *qdisc_root_lock(const struct Qdisc *qdisc)
-{
- struct Qdisc *root = qdisc_root(qdisc);
-
- ASSERT_RTNL();
- return qdisc_lock(root);
-}
-
static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
{
struct Qdisc *root = qdisc_root_sleeping(qdisc);
@@ -541,27 +554,25 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
return qdisc_lock(root);
}
-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
-{
- struct Qdisc *root = qdisc_root_sleeping(qdisc);
-
- ASSERT_RTNL();
- return &root->running;
-}
-
static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
{
return qdisc->dev_queue->dev;
}
-static inline void sch_tree_lock(const struct Qdisc *q)
+static inline void sch_tree_lock(struct Qdisc *q)
{
- spin_lock_bh(qdisc_root_sleeping_lock(q));
+ if (q->flags & TCQ_F_MQROOT)
+ spin_lock_bh(qdisc_lock(q));
+ else
+ spin_lock_bh(qdisc_root_sleeping_lock(q));
}
-static inline void sch_tree_unlock(const struct Qdisc *q)
+static inline void sch_tree_unlock(struct Qdisc *q)
{
- spin_unlock_bh(qdisc_root_sleeping_lock(q));
+ if (q->flags & TCQ_F_MQROOT)
+ spin_unlock_bh(qdisc_lock(q));
+ else
+ spin_unlock_bh(qdisc_root_sleeping_lock(q));
}
extern struct Qdisc noop_qdisc;
@@ -629,6 +640,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
void qdisc_class_hash_destroy(struct Qdisc_class_hash *);
int dev_qdisc_change_tx_queue_len(struct net_device *dev);
+void dev_qdisc_change_real_num_tx(struct net_device *dev,
+ unsigned int new_real_tx);
void dev_init_scheduler(struct net_device *dev);
void dev_shutdown(struct net_device *dev);
void dev_activate(struct net_device *dev);
@@ -664,6 +677,9 @@ qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
{
}
#endif
+void qdisc_offload_query_caps(struct net_device *dev,
+ enum tc_setup_type type,
+ void *caps, size_t caps_len);
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
struct netlink_ext_ack *extack);
@@ -675,22 +691,6 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);
-static inline void skb_reset_tc(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_redirected = 0;
-#endif
-}
-
-static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- return skb->tc_redirected;
-#else
- return false;
-#endif
-}
-
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
@@ -726,11 +726,6 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
}
}
-static inline void qdisc_reset_all_tx(struct net_device *dev)
-{
- qdisc_reset_all_tx_gt(dev, 0);
-}
-
/* Are all TX queues of the device empty? */
static inline bool qdisc_all_tx_empty(const struct net_device *dev)
{
@@ -811,14 +806,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return sch->enqueue(skb, sch, to_free);
}
-static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
__u64 bytes, __u32 packets)
{
- bstats->bytes += bytes;
- bstats->packets += packets;
+ u64_stats_update_begin(&bstats->syncp);
+ u64_stats_add(&bstats->bytes, bytes);
+ u64_stats_add(&bstats->packets, packets);
+ u64_stats_update_end(&bstats->syncp);
}
-static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
const struct sk_buff *skb)
{
_bstats_update(bstats,
@@ -826,26 +823,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
}
-static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
- __u64 bytes, __u32 packets)
-{
- u64_stats_update_begin(&bstats->syncp);
- _bstats_update(&bstats->bstats, bytes, packets);
- u64_stats_update_end(&bstats->syncp);
-}
-
-static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
- const struct sk_buff *skb)
-{
- u64_stats_update_begin(&bstats->syncp);
- bstats_update(&bstats->bstats, skb);
- u64_stats_update_end(&bstats->syncp);
-}
-
static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
const struct sk_buff *skb)
{
- bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
}
static inline void qdisc_bstats_update(struct Qdisc *sch,
@@ -934,10 +915,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen,
__u32 *backlog)
{
struct gnet_stats_queue qstats = { 0 };
- __u32 len = qdisc_qlen_sum(sch);
- __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
- *qlen = qstats.qlen;
+ gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
+ *qlen = qstats.qlen + qdisc_qlen(sch);
*backlog = qstats.backlog;
}
@@ -958,13 +938,6 @@ static inline void qdisc_purge_queue(struct Qdisc *sch)
qdisc_tree_reduce_backlog(sch, qlen, backlog);
}
-static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
-{
- qh->head = NULL;
- qh->tail = NULL;
- qh->qlen = 0;
-}
-
static inline void __qdisc_enqueue_tail(struct sk_buff *skb,
struct qdisc_skb_head *qh)
{
@@ -1062,12 +1035,6 @@ static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
return 0;
}
-static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- return __qdisc_queue_drop_head(sch, &sch->q, to_free);
-}
-
static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
{
const struct qdisc_skb_head *qh = &sch->q;
@@ -1161,7 +1128,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
static inline void qdisc_reset_queue(struct Qdisc *sch)
{
__qdisc_reset_queue(&sch->q);
- sch->qstats.backlog = 0;
}
static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
@@ -1173,7 +1139,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
old = *pold;
*pold = new;
if (old != NULL)
- qdisc_tree_flush_backlog(old);
+ qdisc_purge_queue(old);
sch_tree_unlock(sch);
return old;
@@ -1230,6 +1196,7 @@ struct psched_ratecfg {
u64 rate_bytes_ps; /* bytes per second */
u32 mult;
u16 overhead;
+ u16 mpu;
u8 linklayer;
u8 shift;
};
@@ -1239,6 +1206,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
{
len += r->overhead;
+ if (len < r->mpu)
+ len = r->mpu;
+
if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
@@ -1261,23 +1231,39 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
res->overhead = r->overhead;
+ res->mpu = r->mpu;
res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
}
+struct psched_pktrate {
+ u64 rate_pkts_ps; /* packets per second */
+ u32 mult;
+ u8 shift;
+};
+
+static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r,
+ unsigned int pkt_num)
+{
+ return ((u64)pkt_num * r->mult) >> r->shift;
+}
+
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
+
/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
* The fast path only needs to access filter list and to update stats
*/
struct mini_Qdisc {
struct tcf_proto *filter_list;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct tcf_block *block;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
- struct rcu_head rcu;
+ unsigned long rcu_state;
};
static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
const struct sk_buff *skb)
{
- bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
}
static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
@@ -1295,10 +1281,11 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head);
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq);
+void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
+ struct tcf_block *block);
-static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
-{
- return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb);
-}
+void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);
+
+int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
#endif
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index e8df72e1627a..2058fabffbf6 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -59,6 +59,7 @@ enum sctp_verb {
SCTP_CMD_HB_TIMERS_START, /* Start the heartbeat timers. */
SCTP_CMD_HB_TIMER_UPDATE, /* Update a heartbeat timers. */
SCTP_CMD_HB_TIMERS_STOP, /* Stop the heartbeat timers. */
+ SCTP_CMD_PROBE_TIMER_UPDATE, /* Update a probe timer. */
SCTP_CMD_TRANSPORT_HB_SENT, /* Reset the status of a transport. */
SCTP_CMD_TRANSPORT_IDLE, /* Do manipulations on idle transport */
SCTP_CMD_TRANSPORT_ON, /* Mark the transport as active. */
@@ -68,7 +69,6 @@ enum sctp_verb {
SCTP_CMD_ASSOC_FAILED, /* Handle association failure. */
SCTP_CMD_DISCARD_PACKET, /* Discard the whole packet. */
SCTP_CMD_GEN_SHUTDOWN, /* Generate a SHUTDOWN chunk. */
- SCTP_CMD_UPDATE_ASSOC, /* Update association information. */
SCTP_CMD_PURGE_OUTQUEUE, /* Purge all data waiting to be sent. */
SCTP_CMD_SETUP_T2, /* Hi-level, setup T2-shutdown parms. */
SCTP_CMD_RTO_PENDING, /* Set transport's rto_pending. */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 15b4d9aec7ff..5859e0a16a58 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -77,6 +77,7 @@ enum sctp_event_timeout {
SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
SCTP_EVENT_TIMEOUT_HEARTBEAT,
SCTP_EVENT_TIMEOUT_RECONF,
+ SCTP_EVENT_TIMEOUT_PROBE,
SCTP_EVENT_TIMEOUT_SACK,
SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
@@ -200,6 +201,23 @@ enum sctp_sock_state {
SCTP_SS_CLOSING = TCP_CLOSE_WAIT,
};
+enum sctp_plpmtud_state {
+ SCTP_PL_DISABLED,
+ SCTP_PL_BASE,
+ SCTP_PL_SEARCH,
+ SCTP_PL_COMPLETE,
+ SCTP_PL_ERROR,
+};
+
+#define SCTP_BASE_PLPMTU 1200
+#define SCTP_MAX_PLPMTU 9000
+#define SCTP_MIN_PLPMTU 512
+
+#define SCTP_MAX_PROBES 3
+
+#define SCTP_PL_BIG_STEP 32
+#define SCTP_PL_MIN_STEP 4
+
/* These functions map various type to printable names. */
const char *sctp_cname(const union sctp_subtype id); /* chunk types */
const char *sctp_oname(const union sctp_subtype id); /* other events */
@@ -286,6 +304,8 @@ enum { SCTP_MAX_GABS = 16 };
* functions simpler to write.
*/
+#define SCTP_DEFAULT_UDP_PORT 9899 /* default UDP tunneling port */
+
/* These are the values for pf exposure, UNUSED is to keep compatible with old
* applications by default.
*/
@@ -340,8 +360,7 @@ enum {
#define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK
/* Based on IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>,
- * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24,
- * 192.88.99.0/24.
+ * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 192.88.99.0/24.
* Also, RFC 8.4, non-unicast addresses are not considered valid SCTP
* addresses.
*/
@@ -349,15 +368,16 @@ enum {
((htonl(INADDR_BROADCAST) == a) || \
ipv4_is_multicast(a) || \
ipv4_is_zeronet(a) || \
- ipv4_is_test_198(a) || \
ipv4_is_anycast_6to4(a))
/* Flags used for the bind address copy functions. */
-#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by
+#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by
local sock family */
-#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by
+#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by
+ local sock family */
+#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by
peer */
-#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by
+#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by
peer */
/* Reasons to retransmit. */
@@ -420,4 +440,6 @@ enum {
*/
#define SCTP_AUTH_RANDOM_LENGTH 32
+#define SCTP_PROBE_TIMER_MIN 5000
+
#endif /* __sctp_constants_h__ */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3ab5c6bbb90b..a04999ee99b0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -84,6 +84,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr,
struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
int sctp_register_pf(struct sctp_pf *, sa_family_t);
void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
+int sctp_udp_sock_start(struct net *net);
+void sctp_udp_sock_stop(struct net *net);
/*
* sctp/socket.c
@@ -101,21 +103,20 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
struct sctp_association *asoc);
extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
-struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *);
+typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
void sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_idx(struct net *net,
struct rhashtable_iter *iter, int pos);
-int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
- struct net *net,
+int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr, void *p);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p);
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p);
int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
struct sctp_info *info);
@@ -143,6 +144,8 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
struct sctphdr *, struct sctp_association **,
struct sctp_transport **);
void sctp_err_finish(struct sock *, struct sctp_transport *);
+int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb);
+int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb);
void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
struct sctp_transport *t, __u32 pmtu);
void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
@@ -291,7 +294,7 @@ atomic_dec(&sctp_dbg_objcnt_## name)
#define SCTP_DBG_OBJCNT(name) \
atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0)
-/* Macro to help create new entries in in the global array of
+/* Macro to help create new entries in the global array of
* objcnt counters.
*/
#define SCTP_DBG_OBJCNT_ENTRY(name) \
@@ -412,7 +415,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
/* Tests if the list has one and only one entry. */
static inline int sctp_list_single_entry(struct list_head *head)
{
- return (head->next != head) && (head->next == head->prev);
+ return list_is_singular(head);
}
static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
@@ -506,8 +509,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1);
}
-#define sctp_for_each_hentry(epb, head) \
- hlist_for_each_entry(epb, head, node)
+#define sctp_for_each_hentry(ep, head) \
+ hlist_for_each_entry(ep, head, node)
/* Is a socket of this style? */
#define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style))
@@ -571,15 +574,19 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *
/* Calculate max payload size given a MTU, or the total overhead if
* given MTU is zero
*/
-static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
- __u32 mtu, __u32 extra)
+static inline __u32 __sctp_mtu_payload(const struct sctp_sock *sp,
+ const struct sctp_transport *t,
+ __u32 mtu, __u32 extra)
{
__u32 overhead = sizeof(struct sctphdr) + extra;
- if (sp)
+ if (sp) {
overhead += sp->pf->af->net_header_len;
- else
+ if (sp->udp_port && (!t || t->encap_port))
+ overhead += sizeof(struct udphdr);
+ } else {
overhead += sizeof(struct ipv6hdr);
+ }
if (WARN_ON_ONCE(mtu && mtu <= overhead))
mtu = overhead;
@@ -587,6 +594,12 @@ static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
return mtu ? mtu - overhead : overhead;
}
+static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
+ __u32 mtu, __u32 extra)
+{
+ return __sctp_mtu_payload(sp, NULL, mtu, extra);
+}
+
static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
{
return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst),
@@ -610,9 +623,57 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
}
+static inline int sctp_transport_pl_hlen(struct sctp_transport *t)
+{
+ return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0) -
+ sizeof(struct sctphdr);
+}
+
+static inline void sctp_transport_pl_reset(struct sctp_transport *t)
+{
+ if (t->probe_interval && (t->param_flags & SPP_PMTUD_ENABLE) &&
+ (t->state == SCTP_ACTIVE || t->state == SCTP_UNKNOWN)) {
+ if (t->pl.state == SCTP_PL_DISABLED) {
+ t->pl.state = SCTP_PL_BASE;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ sctp_transport_reset_probe_timer(t);
+ }
+ } else {
+ if (t->pl.state != SCTP_PL_DISABLED) {
+ if (del_timer(&t->probe_timer))
+ sctp_transport_put(t);
+ t->pl.state = SCTP_PL_DISABLED;
+ }
+ }
+}
+
+static inline void sctp_transport_pl_update(struct sctp_transport *t)
+{
+ if (t->pl.state == SCTP_PL_DISABLED)
+ return;
+
+ t->pl.state = SCTP_PL_BASE;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ sctp_transport_reset_probe_timer(t);
+}
+
+static inline bool sctp_transport_pl_enabled(struct sctp_transport *t)
+{
+ return t->pl.state != SCTP_PL_DISABLED;
+}
+
static inline bool sctp_newsk_ready(const struct sock *sk)
{
return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
}
+static inline void sctp_sock_set_nodelay(struct sock *sk)
+{
+ lock_sock(sk);
+ sctp_sk(sk)->nodelay = true;
+ release_sock(sk);
+}
+
#endif /* __net_sctp_h__ */
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 5c491a3bc27e..f37c7a558d6d 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -151,6 +151,7 @@ sctp_state_fn_t sctp_sf_cookie_wait_icmp_abort;
/* Prototypes for timeout event state functions. */
sctp_state_fn_t sctp_sf_do_6_3_3_rtx;
sctp_state_fn_t sctp_sf_send_reconf;
+sctp_state_fn_t sctp_sf_send_probe;
sctp_state_fn_t sctp_sf_do_6_2_sack;
sctp_state_fn_t sctp_sf_autoclose_timer_expire;
@@ -221,12 +222,17 @@ struct sctp_chunk *sctp_make_violation_paramlen(
struct sctp_chunk *sctp_make_violation_max_retrans(
const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_new_encap_port(
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk);
struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
- const struct sctp_transport *transport);
+ const struct sctp_transport *transport,
+ __u32 probe_size);
struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
const void *payload,
const size_t paylen);
+struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len);
struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
__be16 cause_code, const void *payload,
@@ -307,6 +313,7 @@ int sctp_do_sm(struct net *net, enum sctp_event_type event_type,
void sctp_generate_t3_rtx_event(struct timer_list *t);
void sctp_generate_heartbeat_event(struct timer_list *t);
void sctp_generate_reconf_event(struct timer_list *t);
+void sctp_generate_probe_event(struct timer_list *t);
void sctp_generate_proto_unreach_event(struct timer_list *t);
void sctp_ootb_pkt_free(struct sctp_packet *packet);
@@ -377,10 +384,11 @@ sctp_vtag_verify(const struct sctp_chunk *chunk,
* Verification Tag value does not match the receiver's own
* tag value, the receiver shall silently discard the packet...
*/
- if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)
- return 1;
+ if (ntohl(chunk->sctp_hdr->vtag) != asoc->c.my_vtag)
+ return 0;
- return 0;
+ chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port;
+ return 1;
}
/* Check VTAG of the packet matches the sender's own tag and the T bit is
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 314a2fa21d6b..350f250b0dc7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -177,6 +177,10 @@ struct sctp_sock {
* will be inherited by all new associations.
*/
__u32 hbinterval;
+ __u32 probe_interval;
+
+ __be16 udp_port;
+ __be16 encap_port;
/* This is the max_retrans value for new associations. */
__u16 pathmaxrxt;
@@ -226,12 +230,14 @@ struct sctp_sock {
data_ready_signalled:1;
atomic_t pd_mode;
+
+ /* Fields after this point will be skipped on copies, like on accept
+ * and peeloff operations
+ */
+
/* Receive to here while partial delivery is in effect. */
struct sk_buff_head pd_lobby;
- /* These must be the last fields, as they will skipped on copies,
- * like on accept and peeloff operations
- */
struct list_head auto_asconf_list;
int do_auto_asconf;
};
@@ -326,7 +332,7 @@ struct sctp_cookie {
* the association TCB is re-constructed from the cookie.
*/
__u32 raw_addr_list_len;
- struct sctp_init_chunk peer_init[0];
+ struct sctp_init_chunk peer_init[];
};
@@ -380,6 +386,7 @@ struct sctp_sender_hb_info {
union sctp_addr daddr;
unsigned long sent_at;
__u64 hb_nonce;
+ __u32 probe_size;
};
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
@@ -431,23 +438,13 @@ struct sctp_af {
int (*setsockopt) (struct sock *sk,
int level,
int optname,
- char __user *optval,
+ sockptr_t optval,
unsigned int optlen);
int (*getsockopt) (struct sock *sk,
int level,
int optname,
char __user *optval,
int __user *optlen);
- int (*compat_setsockopt) (struct sock *sk,
- int level,
- int optname,
- char __user *optval,
- unsigned int optlen);
- int (*compat_getsockopt) (struct sock *sk,
- int level,
- int optname,
- char __user *optval,
- int __user *optlen);
void (*get_dst) (struct sctp_transport *t,
union sctp_addr *saddr,
struct flowi *fl,
@@ -466,7 +463,7 @@ struct sctp_af {
int saddr);
void (*from_sk) (union sctp_addr *,
struct sock *sk);
- void (*from_addr_param) (union sctp_addr *,
+ bool (*from_addr_param) (union sctp_addr *,
union sctp_addr_param *,
__be16 port, int iif);
int (*to_addr_param) (const union sctp_addr *,
@@ -661,6 +658,7 @@ struct sctp_chunk {
data_accepted:1, /* At least 1 chunk accepted */
auth:1, /* IN: was auth'ed | OUT: needs auth */
has_asconf:1, /* IN: have seen an asconf before */
+ pmtu_probe:1, /* Used by PLPMTUD, can be set in s HB chunk */
tsn_missing_report:2, /* Data chunk missing counter. */
fast_retransmit:2; /* Is this chunk fast retransmitted? */
};
@@ -863,6 +861,7 @@ struct sctp_transport {
* the destination address every heartbeat interval.
*/
unsigned long hbinterval;
+ unsigned long probe_interval;
/* SACK delay timeout */
unsigned long sackdelay;
@@ -885,6 +884,8 @@ struct sctp_transport {
*/
unsigned long last_time_ecne_reduced;
+ __be16 encap_port;
+
/* This is the max_retrans value for the transport and will
* be initialized from the assocs value. This can be changed
* using the SCTP_SET_PEER_ADDR_PARAMS socket option.
@@ -937,6 +938,9 @@ struct sctp_transport {
/* Timer to handler reconf chunk rtx */
struct timer_list reconf_timer;
+ /* Timer to send a probe HB packet for PLPMTUD */
+ struct timer_list probe_timer;
+
/* Since we're using per-destination retransmission timers
* (see above), we're also using per-destination "transmitted"
* queues. This probably ought to be a private struct
@@ -979,6 +983,14 @@ struct sctp_transport {
char cacc_saw_newack;
} cacc;
+ struct {
+ __u16 pmtu;
+ __u16 probe_size;
+ __u16 probe_high;
+ __u8 probe_count;
+ __u8 state;
+ } pl; /* plpmtud related */
+
/* 64-bit random number sent with heartbeat. */
__u64 hb_nonce;
@@ -996,6 +1008,8 @@ void sctp_transport_free(struct sctp_transport *);
void sctp_transport_reset_t3_rtx(struct sctp_transport *);
void sctp_transport_reset_hb_timer(struct sctp_transport *);
void sctp_transport_reset_reconf_timer(struct sctp_transport *transport);
+void sctp_transport_reset_probe_timer(struct sctp_transport *transport);
+void sctp_transport_reset_raise_timer(struct sctp_transport *transport);
int sctp_transport_hold(struct sctp_transport *);
void sctp_transport_put(struct sctp_transport *);
void sctp_transport_update_rto(struct sctp_transport *, __u32);
@@ -1010,6 +1024,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
void sctp_transport_immediate_rtx(struct sctp_transport *);
void sctp_transport_dst_release(struct sctp_transport *t);
void sctp_transport_dst_confirm(struct sctp_transport *t);
+void sctp_transport_pl_send(struct sctp_transport *t);
+bool sctp_transport_pl_recv(struct sctp_transport *t);
/* This is the structure we use to queue packets as they come into
@@ -1125,13 +1141,14 @@ static inline void sctp_outq_cork(struct sctp_outq *q)
*/
struct sctp_input_cb {
union {
- struct inet_skb_parm h4;
+ struct inet_skb_parm h4;
#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_skb_parm h6;
+ struct inet6_skb_parm h6;
#endif
} header;
struct sctp_chunk *chunk;
struct sctp_af *af;
+ __be16 encap_port;
};
#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0]))
@@ -1226,10 +1243,6 @@ enum sctp_endpoint_type {
*/
struct sctp_ep_common {
- /* Fields to help us manage our entries in the hash tables. */
- struct hlist_node node;
- int hashent;
-
/* Runtime type information. What kind of endpoint is this? */
enum sctp_endpoint_type type;
@@ -1281,6 +1294,10 @@ struct sctp_endpoint {
/* Common substructure for endpoint and association. */
struct sctp_ep_common base;
+ /* Fields to help us manage our entries in the hash tables. */
+ struct hlist_node node;
+ int hashent;
+
/* Associations: A list of current associations and mappings
* to the data consumers for each association. This
* may be in the form of a hash table or other
@@ -1337,16 +1354,7 @@ struct sctp_endpoint {
reconf_enable:1;
__u8 strreset_enable;
-
- /* Security identifiers from incoming (INIT). These are set by
- * security_sctp_assoc_request(). These will only be used by
- * SCTP TCP type sockets and peeled off connections as they
- * cause a new socket to be generated. security_sctp_sk_clone()
- * will then plug these into the new socket.
- */
-
- u32 secid;
- u32 peer_secid;
+ struct rcu_head rcu;
};
/* Recover the outter endpoint structure. */
@@ -1362,7 +1370,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
void sctp_endpoint_free(struct sctp_endpoint *);
void sctp_endpoint_put(struct sctp_endpoint *);
-void sctp_endpoint_hold(struct sctp_endpoint *);
+int sctp_endpoint_hold(struct sctp_endpoint *ep);
void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
struct sctp_association *sctp_endpoint_lookup_assoc(
const struct sctp_endpoint *ep,
@@ -1398,7 +1406,7 @@ struct sctp_stream_priorities {
struct list_head prio_sched;
/* List of streams scheduled */
struct list_head active;
- /* The next stream stream in line */
+ /* The next stream in line */
struct sctp_stream_out_ext *next;
__u16 prio;
};
@@ -1460,7 +1468,7 @@ struct sctp_stream {
struct {
/* List of streams scheduled */
struct list_head rr_list;
- /* The next stream stream in line */
+ /* The next stream in line */
struct sctp_stream_out_ext *rr_next;
};
};
@@ -1770,7 +1778,7 @@ struct sctp_association {
int max_burst;
/* This is the max_retrans value for the association. This value will
- * be initialized initialized from system defaults, but can be
+ * be initialized from system defaults, but can be
* modified by the SCTP_ASSOCINFO socket option.
*/
int max_retrans;
@@ -1797,6 +1805,9 @@ struct sctp_association {
* will be inherited by all new transports.
*/
unsigned long hbinterval;
+ unsigned long probe_interval;
+
+ __be16 encap_port;
/* This is the max_retrans value for new transports in the
* association.
@@ -2083,6 +2094,16 @@ struct sctp_association {
__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
+ /* Security identifiers from incoming (INIT). These are set by
+ * security_sctp_assoc_request(). These will only be used by
+ * SCTP TCP type sockets and peeled off connections as they
+ * cause a new socket to be generated. security_sctp_sk_clone()
+ * will then plug these into the new socket.
+ */
+
+ u32 secid;
+ u32 peer_secid;
+
struct rcu_head rcu;
};
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 0b032b92da0b..994e984eef32 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -80,7 +80,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
struct sctp_chunk *chunk,
gfp_t gfp);
-void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+void sctp_ulpevent_notify_peer_addr_change(struct sctp_transport *transport,
int state, int error);
struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index d7d2495f83c2..21e7fa2a1813 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -4,8 +4,10 @@
#include <linux/types.h>
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+struct net;
+
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport);
diff --git a/include/net/seg6.h b/include/net/seg6.h
index 640724b35273..af668f17b398 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -57,10 +57,31 @@ extern void seg6_iptunnel_exit(void);
extern int seg6_local_init(void);
extern void seg6_local_exit(void);
-extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
+extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced);
+extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags);
+extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt);
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto);
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id);
+
+/* If the packet which invoked an ICMP error contains an SRH return
+ * the true destination address from within the SRH, otherwise use the
+ * destination address in the IP header.
+ */
+static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb,
+ struct inet6_skb_parm *opt)
+{
+ struct ipv6_sr_hdr *srh;
+
+ if (opt->flags & IP6SKB_SEG6) {
+ srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff);
+ return &srh->segments[0];
+ }
+
+ return NULL;
+}
+
+
#endif
diff --git a/include/net/selftests.h b/include/net/selftests.h
new file mode 100644
index 000000000000..e65e8d230d33
--- /dev/null
+++ b/include/net/selftests.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_SELFTESTS
+#define _NET_SELFTESTS
+
+#include <linux/ethtool.h>
+
+#if IS_ENABLED(CONFIG_NET_SELFTESTS)
+
+void net_selftest(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf);
+int net_selftest_get_count(void);
+void net_selftest_get_strings(u8 *data);
+
+#else
+
+static inline void net_selftest(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf)
+{
+}
+
+static inline int net_selftest_get_count(void)
+{
+ return 0;
+}
+
+static inline void net_selftest_get_strings(u8 *data)
+{
+}
+
+#endif
+#endif /* _NET_SELFTESTS */
diff --git a/include/net/smc.h b/include/net/smc.h
index 646feb4bc75f..c926d3313e05 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -11,6 +11,13 @@
#ifndef _SMC_H
#define _SMC_H
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+struct sock;
+
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
struct smc_hashinfo {
@@ -37,6 +44,8 @@ struct smcd_dmb {
#define ISM_EVENT_GID 1
#define ISM_EVENT_SWR 2
+#define ISM_RESERVED_VLANID 0x1FFF
+
#define ISM_ERROR 0xFFFF
struct smcd_event {
@@ -63,6 +72,8 @@ struct smcd_ops {
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
+ u8* (*get_system_eid)(void);
+ u16 (*get_chid)(struct smcd_dev *dev);
};
struct smcd_dev {
@@ -90,5 +101,5 @@ int smcd_register_dev(struct smcd_dev *smcd);
void smcd_unregister_dev(struct smcd_dev *smcd);
void smcd_free_dev(struct smcd_dev *smcd);
void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event);
-void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit);
+void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask);
#endif /* _SMC_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 328564525526..5db02546941c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -56,17 +56,19 @@
#include <linux/wait.h>
#include <linux/cgroup-defs.h>
#include <linux/rbtree.h>
-#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include <linux/poll.h>
-
+#include <linux/sockptr.h>
+#include <linux/indirect_call_wrapper.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
+#include <linux/llist.h>
#include <net/dst.h>
#include <net/checksum.h>
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
#include <net/l3mdev.h>
+#include <uapi/linux/socket.h>
/*
* This structure really needs to be cleaned up.
@@ -159,9 +161,6 @@ typedef __u64 __bitwise __addrpair;
* for struct sock and struct inet_timewait_sock.
*/
struct sock_common {
- /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
- * address on 64bit arches : cf INET_MATCH()
- */
union {
__addrpair skc_addrpair;
struct {
@@ -225,7 +224,7 @@ struct sock_common {
struct hlist_nulls_node skc_nulls_node;
};
unsigned short skc_tx_queue_mapping;
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
unsigned short skc_rx_queue_mapping;
#endif
union {
@@ -245,7 +244,8 @@ struct sock_common {
/* public: */
};
-struct bpf_sk_storage;
+struct bpf_local_storage;
+struct sk_filter;
/**
* struct sock - network layer representation of sockets
@@ -257,10 +257,11 @@ struct bpf_sk_storage;
* @sk_rcvbuf: size of receive buffer in bytes
* @sk_wq: sock wait queue and async head
* @sk_rx_dst: receive input route used by early demux
+ * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst
+ * @sk_rx_dst_cookie: cookie for @sk_rx_dst
* @sk_dst_cache: destination cache
* @sk_dst_pending_confirm: need to confirm neighbour
* @sk_policy: flow policy
- * @sk_rx_skb_cache: cache copy of recently accessed RX skb
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
* @sk_tsq_flags: TCP Small Queues flags
@@ -268,6 +269,7 @@ struct bpf_sk_storage;
* @sk_omem_alloc: "o" is "option" or "other"
* @sk_wmem_queued: persistent queue size
* @sk_forward_alloc: space allocated forward
+ * @sk_reserved_mem: space reserved and non-reclaimable for the socket
* @sk_napi_id: id of the last napi context to receive data for sk
* @sk_ll_usec: usecs to busypoll when there is no data
* @sk_allocation: allocation mode
@@ -280,9 +282,7 @@ struct bpf_sk_storage;
* @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
* @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
- * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
- * @sk_route_forced_caps: static, forced route capabilities
- * (set in tcp_init_sock())
+ * @sk_gso_disabled: if set, NETIF_F_GSO_MASK is forbidden.
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_gso_max_segs: Maximum number of GSO segments
@@ -300,20 +300,26 @@ struct bpf_sk_storage;
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
+ * @sk_prefer_busy_poll: prefer busypolling over softirq processing
+ * @sk_busy_poll_budget: napi processing budget when busypolling
* @sk_priority: %SO_PRIORITY setting
* @sk_type: socket type (%SOCK_STREAM, etc)
* @sk_protocol: which protocol this socket belongs in this network family
+ * @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred
* @sk_peer_pid: &struct pid for this socket's peer
* @sk_peer_cred: %SO_PEERCRED setting
* @sk_rcvlowat: %SO_RCVLOWAT setting
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_txhash: computed flow hash for use on transmit
+ * @sk_txrehash: enable TX hash rethink
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
* @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
- * @sk_tsflags: SO_TIMESTAMPING socket options
+ * @sk_tsflags: SO_TIMESTAMPING flags
+ * @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock
+ * for timestamping
* @sk_tskey: counter to disambiguate concurrent tstamp requests
* @sk_zckey: counter to order MSG_ZEROCOPY notifications
* @sk_socket: Identd and reporting IO signals
@@ -322,7 +328,6 @@ struct bpf_sk_storage;
* @sk_peek_off: current peek_offset value
* @sk_send_head: front of stuff to transmit
* @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head]
- * @sk_tx_skb_cache: cache copy of recently accessed TX skb
* @sk_security: used by security modules
* @sk_mark: generic packet mark
* @sk_cgrp_data: cgroup data for this cgroup
@@ -342,6 +347,8 @@ struct bpf_sk_storage;
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
+ * @ns_tracker: tracker for netns reference
+ * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
@@ -353,7 +360,7 @@ struct sock {
#define sk_nulls_node __sk_common.skc_nulls_node
#define sk_refcnt __sk_common.skc_refcnt
#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
#define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping
#endif
@@ -383,11 +390,15 @@ struct sock {
#define sk_flags __sk_common.skc_flags
#define sk_rxhash __sk_common.skc_rxhash
+ /* early demux fields */
+ struct dst_entry __rcu *sk_rx_dst;
+ int sk_rx_dst_ifindex;
+ u32 sk_rx_dst_cookie;
+
socket_lock_t sk_lock;
atomic_t sk_drops;
int sk_rcvlowat;
struct sk_buff_head sk_error_queue;
- struct sk_buff *sk_rx_skb_cache;
struct sk_buff_head sk_receive_queue;
/*
* The backlog queue is special, it is always used with
@@ -403,9 +414,11 @@ struct sock {
struct sk_buff *head;
struct sk_buff *tail;
} sk_backlog;
+
#define sk_rmem_alloc sk_backlog.rmem_alloc
int sk_forward_alloc;
+ u32 sk_reserved_mem;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_ll_usec;
/* ===== mostly read cache line ===== */
@@ -423,7 +436,7 @@ struct sock {
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
- struct dst_entry *sk_rx_dst;
+
struct dst_entry __rcu *sk_dst_cache;
atomic_t sk_omem_alloc;
int sk_sndbuf;
@@ -436,7 +449,6 @@ struct sock {
struct sk_buff *sk_send_head;
struct rb_root tcp_rtx_queue;
};
- struct sk_buff *sk_tx_skb_cache;
struct sk_buff_head sk_write_queue;
__s32 sk_peek_off;
int sk_write_pending;
@@ -450,8 +462,6 @@ struct sock {
unsigned long sk_max_pacing_rate;
struct page_frag sk_frag;
netdev_features_t sk_route_caps;
- netdev_features_t sk_route_nocaps;
- netdev_features_t sk_route_forced_caps;
int sk_gso_type;
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
@@ -461,7 +471,7 @@ struct sock {
* Because of non atomicity rules, all
* changes are protected by socket lock.
*/
- u8 sk_padding : 1,
+ u8 sk_gso_disabled : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
@@ -478,8 +488,16 @@ struct sock {
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
+ u8 sk_txrehash;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ u8 sk_prefer_busy_poll;
+ u16 sk_busy_poll_budget;
+#endif
+ spinlock_t sk_peer_lock;
+ int sk_bind_phc;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
+
long sk_rcvtimeo;
ktime_t sk_stamp;
#if BITS_PER_LONG==32
@@ -487,7 +505,7 @@ struct sock {
#endif
u16 sk_tsflags;
u8 sk_shutdown;
- u32 sk_tskey;
+ atomic_t sk_tskey;
atomic_t sk_zckey;
u8 sk_clockid;
@@ -516,9 +534,11 @@ struct sock {
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
- struct bpf_sk_storage __rcu *sk_bpf_storage;
+ struct bpf_local_storage __rcu *sk_bpf_storage;
#endif
struct rcu_head sk_rcu;
+ netns_tracker ns_tracker;
+ struct hlist_node sk_bind2_node;
};
enum sk_pacing {
@@ -527,10 +547,109 @@ enum sk_pacing {
SK_PACING_FQ = 2,
};
+/* flag bits in sk_user_data
+ *
+ * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might
+ * not be suitable for copying when cloning the socket. For instance,
+ * it can point to a reference counted object. sk_user_data bottom
+ * bit is set if pointer must not be copied.
+ *
+ * - SK_USER_DATA_BPF: Mark whether sk_user_data field is
+ * managed/owned by a BPF reuseport array. This bit should be set
+ * when sk_user_data's sk is added to the bpf's reuseport_array.
+ *
+ * - SK_USER_DATA_PSOCK: Mark whether pointer stored in
+ * sk_user_data points to psock type. This bit should be set
+ * when sk_user_data is assigned to a psock object.
+ */
+#define SK_USER_DATA_NOCOPY 1UL
+#define SK_USER_DATA_BPF 2UL
+#define SK_USER_DATA_PSOCK 4UL
+#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\
+ SK_USER_DATA_PSOCK)
+
+/**
+ * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied
+ * @sk: socket
+ */
+static inline bool sk_user_data_is_nocopy(const struct sock *sk)
+{
+ return ((uintptr_t)sk->sk_user_data & SK_USER_DATA_NOCOPY);
+}
+
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
-#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
-#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
+/**
+ * __locked_read_sk_user_data_with_flags - return the pointer
+ * only if argument flags all has been set in sk_user_data. Otherwise
+ * return NULL
+ *
+ * @sk: socket
+ * @flags: flag bits
+ *
+ * The caller must be holding sk->sk_callback_lock.
+ */
+static inline void *
+__locked_read_sk_user_data_with_flags(const struct sock *sk,
+ uintptr_t flags)
+{
+ uintptr_t sk_user_data =
+ (uintptr_t)rcu_dereference_check(__sk_user_data(sk),
+ lockdep_is_held(&sk->sk_callback_lock));
+
+ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
+
+ if ((sk_user_data & flags) == flags)
+ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
+ return NULL;
+}
+
+/**
+ * __rcu_dereference_sk_user_data_with_flags - return the pointer
+ * only if argument flags all has been set in sk_user_data. Otherwise
+ * return NULL
+ *
+ * @sk: socket
+ * @flags: flag bits
+ */
+static inline void *
+__rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
+ uintptr_t flags)
+{
+ uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));
+
+ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
+
+ if ((sk_user_data & flags) == flags)
+ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
+ return NULL;
+}
+
+#define rcu_dereference_sk_user_data(sk) \
+ __rcu_dereference_sk_user_data_with_flags(sk, 0)
+#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \
+({ \
+ uintptr_t __tmp1 = (uintptr_t)(ptr), \
+ __tmp2 = (uintptr_t)(flags); \
+ WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \
+ WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \
+ rcu_assign_pointer(__sk_user_data((sk)), \
+ __tmp1 | __tmp2); \
+})
+#define rcu_assign_sk_user_data(sk, ptr) \
+ __rcu_assign_sk_user_data_with_flags(sk, ptr, 0)
+
+static inline
+struct net *sock_net(const struct sock *sk)
+{
+ return read_pnet(&sk->sk_net);
+}
+
+static inline
+void sock_net_set(struct sock *sk, struct net *net)
+{
+ write_pnet(&sk->sk_net, net);
+}
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
@@ -545,7 +664,7 @@ enum sk_pacing {
int sk_set_peek_off(struct sock *sk, int val);
-static inline int sk_peek_offset(struct sock *sk, int flags)
+static inline int sk_peek_offset(const struct sock *sk, int flags)
{
if (unlikely(flags & MSG_PEEK)) {
return READ_ONCE(sk->sk_peek_off);
@@ -625,11 +744,6 @@ static inline void sk_node_init(struct hlist_node *node)
node->pprev = NULL;
}
-static inline void sk_nulls_node_init(struct hlist_nulls_node *node)
-{
- node->pprev = NULL;
-}
-
static inline void __sk_del_node(struct sock *sk)
{
__hlist_del(&sk->sk_node);
@@ -753,6 +867,16 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
+static inline void __sk_del_bind2_node(struct sock *sk)
+{
+ __hlist_del(&sk->sk_bind2_node);
+}
+
+static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
+{
+ hlist_add_head(&sk->sk_bind2_node, list);
+}
+
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -770,6 +894,8 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
+#define sk_for_each_bound_bhash2(__sk, list) \
+ hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
@@ -785,7 +911,7 @@ static inline void sk_add_bind_node(struct sock *sk,
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
pos = rcu_dereference(hlist_next_rcu(pos)))
-static inline struct user_namespace *sk_user_ns(struct sock *sk)
+static inline struct user_namespace *sk_user_ns(const struct sock *sk)
{
/* Careful only use this in a context where these parameters
* can not change and must all be valid, such as recvmsg from
@@ -810,7 +936,6 @@ enum sock_flags {
SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
- SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
SOCK_MEMALLOC, /* VM depends on this socket for swapping */
SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */
SOCK_FASYNC, /* fasync() active */
@@ -827,11 +952,12 @@ enum sock_flags {
SOCK_TXTIME,
SOCK_XDP, /* XDP is attached */
SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
+ SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
+static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
{
nsk->sk_flags = osk->sk_flags;
}
@@ -846,6 +972,15 @@ static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag)
__clear_bit(flag, &sk->sk_flags);
}
+static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
+ int valbool)
+{
+ if (valbool)
+ sock_set_flag(sk, bit);
+ else
+ sock_reset_flag(sk, bit);
+}
+
static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
{
return test_bit(flag, &sk->sk_flags);
@@ -857,6 +992,8 @@ static inline int sk_memalloc_socks(void)
{
return static_branch_unlikely(&memalloc_socks_key);
}
+
+void __receive_sock(struct file *file);
#else
static inline int sk_memalloc_socks(void)
@@ -864,6 +1001,8 @@ static inline int sk_memalloc_socks(void)
return 0;
}
+static inline void __receive_sock(struct file *file)
+{ }
#endif
static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
@@ -881,6 +1020,10 @@ static inline void sk_acceptq_added(struct sock *sk)
WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1);
}
+/* Note: If you think the test should be:
+ * return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog);
+ * Then please take a look at commit 64a146513f8f ("[NET]: Revert incorrect accept queue backlog changes.")
+ */
static inline bool sk_acceptq_is_full(const struct sock *sk)
{
return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
@@ -955,12 +1098,18 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s
int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb));
+
static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
if (sk_memalloc_socks() && skb_pfmemalloc(skb))
return __sk_backlog_rcv(sk, skb);
- return sk->sk_backlog_rcv(sk, skb);
+ return INDIRECT_CALL_INET(sk->sk_backlog_rcv,
+ tcp_v6_do_rcv,
+ tcp_v4_do_rcv,
+ sk, skb);
}
static inline void sk_incoming_cpu_update(struct sock *sk)
@@ -1061,6 +1210,7 @@ struct inet_hashinfo;
struct raw_hashinfo;
struct smc_hashinfo;
struct module;
+struct sk_psock;
/*
* caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
@@ -1097,36 +1247,31 @@ struct proto {
void (*destroy)(struct sock *sk);
void (*shutdown)(struct sock *sk, int how);
int (*setsockopt)(struct sock *sk, int level,
- int optname, char __user *optval,
+ int optname, sockptr_t optval,
unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level,
int optname, char __user *optval,
int __user *option);
void (*keepalive)(struct sock *sk, int valbool);
#ifdef CONFIG_COMPAT
- int (*compat_setsockopt)(struct sock *sk,
- int level,
- int optname, char __user *optval,
- unsigned int optlen);
- int (*compat_getsockopt)(struct sock *sk,
- int level,
- int optname, char __user *optval,
- int __user *option);
int (*compat_ioctl)(struct sock *sk,
unsigned int cmd, unsigned long arg);
#endif
int (*sendmsg)(struct sock *sk, struct msghdr *msg,
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags,
- int *addr_len);
+ size_t len, int flags, int *addr_len);
int (*sendpage)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int (*bind)(struct sock *sk,
- struct sockaddr *uaddr, int addr_len);
+ struct sockaddr *addr, int addr_len);
+ int (*bind_add)(struct sock *sk,
+ struct sockaddr *addr, int addr_len);
int (*backlog_rcv) (struct sock *sk,
struct sk_buff *skb);
+ bool (*bpf_bypass_getsockopt)(int level,
+ int optname);
void (*release_cb)(struct sock *sk);
@@ -1135,19 +1280,31 @@ struct proto {
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
+ void (*put_port)(struct sock *sk);
+#ifdef CONFIG_BPF_SYSCALL
+ int (*psock_update_sk_prot)(struct sock *sk,
+ struct sk_psock *psock,
+ bool restore);
+#endif
/* Keeping track of sockets in use */
#ifdef CONFIG_PROC_FS
unsigned int inuse_idx;
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ int (*forward_alloc_get)(const struct sock *sk);
+#endif
+
bool (*stream_memory_free)(const struct sock *sk, int wake);
- bool (*stream_memory_read)(const struct sock *sk);
+ bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
atomic_long_t *memory_allocated; /* Current allocated memory. */
+ int __percpu *per_cpu_fw_alloc;
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
+
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
@@ -1171,7 +1328,7 @@ struct proto {
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
- struct percpu_counter *orphan_count;
+ unsigned int __percpu *orphan_count;
struct request_sock_ops *rsk_prot;
struct timewait_sock_ops *twsk_prot;
@@ -1223,13 +1380,25 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
#define sk_refcnt_debug_release(sk) do { } while (0)
#endif /* SOCK_REFCNT_DEBUG */
+INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
+
+static inline int sk_forward_alloc_get(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_MPTCP)
+ if (sk->sk_prot->forward_alloc_get)
+ return sk->sk_prot->forward_alloc_get(sk);
+#endif
+ return sk->sk_forward_alloc;
+}
+
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
return false;
return sk->sk_prot->stream_memory_free ?
- sk->sk_prot->stream_memory_free(sk, wake) : true;
+ INDIRECT_CALL_INET_1(sk->sk_prot->stream_memory_free,
+ tcp_stream_memory_free, sk, wake) : true;
}
static inline bool sk_stream_memory_free(const struct sock *sk)
@@ -1277,31 +1446,60 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
}
static inline long
-sk_memory_allocated(const struct sock *sk)
+proto_memory_allocated(const struct proto *prot)
{
- return atomic_long_read(sk->sk_prot->memory_allocated);
+ return max(0L, atomic_long_read(prot->memory_allocated));
}
static inline long
+sk_memory_allocated(const struct sock *sk)
+{
+ return proto_memory_allocated(sk->sk_prot);
+}
+
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
+static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
{
- return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
+ int local_reserve;
+
+ preempt_disable();
+ local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+ if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+ __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+ atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+ }
+ preempt_enable();
}
static inline void
sk_memory_allocated_sub(struct sock *sk, int amt)
{
- atomic_long_sub(amt, sk->sk_prot->memory_allocated);
+ int local_reserve;
+
+ preempt_disable();
+ local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+ if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+ __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+ atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+ }
+ preempt_enable();
}
+#define SK_ALLOC_PERCPU_COUNTER_BATCH 16
+
static inline void sk_sockets_allocated_dec(struct sock *sk)
{
- percpu_counter_dec(sk->sk_prot->sockets_allocated);
+ percpu_counter_add_batch(sk->sk_prot->sockets_allocated, -1,
+ SK_ALLOC_PERCPU_COUNTER_BATCH);
}
static inline void sk_sockets_allocated_inc(struct sock *sk)
{
- percpu_counter_inc(sk->sk_prot->sockets_allocated);
+ percpu_counter_add_batch(sk->sk_prot->sockets_allocated, 1,
+ SK_ALLOC_PERCPU_COUNTER_BATCH);
}
static inline u64
@@ -1316,12 +1514,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
return percpu_counter_sum_positive(prot->sockets_allocated);
}
-static inline long
-proto_memory_allocated(struct proto *prot)
-{
- return atomic_long_read(prot->memory_allocated);
-}
-
static inline bool
proto_memory_pressure(struct proto *prot)
{
@@ -1332,13 +1524,32 @@ proto_memory_pressure(struct proto *prot)
#ifdef CONFIG_PROC_FS
-/* Called with local bh disabled */
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
+#define PROTO_INUSE_NR 64 /* should be enough for the first time */
+struct prot_inuse {
+ int all;
+ int val[PROTO_INUSE_NR];
+};
+
+static inline void sock_prot_inuse_add(const struct net *net,
+ const struct proto *prot, int val)
+{
+ this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
+}
+
+static inline void sock_inuse_add(const struct net *net, int val)
+{
+ this_cpu_add(net->core.prot_inuse->all, val);
+}
+
int sock_prot_inuse_get(struct net *net, struct proto *proto);
int sock_inuse_get(struct net *net);
#else
-static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
- int inc)
+static inline void sock_prot_inuse_add(const struct net *net,
+ const struct proto *prot, int val)
+{
+}
+
+static inline void sock_inuse_add(const struct net *net, int val)
{
}
#endif
@@ -1363,8 +1574,6 @@ static inline int __sk_prot_rehash(struct sock *sk)
#define RCV_SHUTDOWN 1
#define SEND_SHUTDOWN 2
-#define SOCK_SNDBUF_LOCK 1
-#define SOCK_RCVBUF_LOCK 2
#define SOCK_BINDADDR_LOCK 4
#define SOCK_BINDPORT_LOCK 8
@@ -1391,30 +1600,18 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind);
void __sk_mem_reduce_allocated(struct sock *sk, int amount);
void __sk_mem_reclaim(struct sock *sk, int amount);
-/* We used to have PAGE_SIZE here, but systems with 64KB pages
- * do not necessarily have 16x time more memory than 4KB ones.
- */
-#define SK_MEM_QUANTUM 4096
-#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
#define SK_MEM_SEND 0
#define SK_MEM_RECV 1
-/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
+/* sysctl_mem values are in pages */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
- long val = sk->sk_prot->sysctl_mem[index];
-
-#if PAGE_SIZE > SK_MEM_QUANTUM
- val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
-#elif PAGE_SIZE < SK_MEM_QUANTUM
- val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT;
-#endif
- return val;
+ return READ_ONCE(sk->sk_prot->sysctl_mem[index]);
}
static inline int sk_mem_pages(int amt)
{
- return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
+ return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT;
}
static inline bool sk_has_account(struct sock *sk)
@@ -1425,36 +1622,56 @@ static inline bool sk_has_account(struct sock *sk)
static inline bool sk_wmem_schedule(struct sock *sk, int size)
{
+ int delta;
+
if (!sk_has_account(sk))
return true;
- return size <= sk->sk_forward_alloc ||
- __sk_mem_schedule(sk, size, SK_MEM_SEND);
+ delta = size - sk->sk_forward_alloc;
+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND);
}
static inline bool
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
{
+ int delta;
+
if (!sk_has_account(sk))
return true;
- return size<= sk->sk_forward_alloc ||
- __sk_mem_schedule(sk, size, SK_MEM_RECV) ||
+ delta = size - sk->sk_forward_alloc;
+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
skb_pfmemalloc(skb);
}
+static inline int sk_unused_reserved_mem(const struct sock *sk)
+{
+ int unused_mem;
+
+ if (likely(!sk->sk_reserved_mem))
+ return 0;
+
+ unused_mem = sk->sk_reserved_mem - sk->sk_wmem_queued -
+ atomic_read(&sk->sk_rmem_alloc);
+
+ return unused_mem > 0 ? unused_mem : 0;
+}
+
static inline void sk_mem_reclaim(struct sock *sk)
{
+ int reclaimable;
+
if (!sk_has_account(sk))
return;
- if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
- __sk_mem_reclaim(sk, sk->sk_forward_alloc);
+
+ reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
+
+ if (reclaimable >= (int)PAGE_SIZE)
+ __sk_mem_reclaim(sk, reclaimable);
}
-static inline void sk_mem_reclaim_partial(struct sock *sk)
+static inline void sk_mem_reclaim_final(struct sock *sk)
{
- if (!sk_has_account(sk))
- return;
- if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
- __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1);
+ sk->sk_reserved_mem = 0;
+ sk_mem_reclaim(sk);
}
static inline void sk_mem_charge(struct sock *sk, int size)
@@ -1469,42 +1686,7 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
if (!sk_has_account(sk))
return;
sk->sk_forward_alloc += size;
-
- /* Avoid a possible overflow.
- * TCP send queues can make this happen, if sk_mem_reclaim()
- * is not called and more than 2 GBytes are released at once.
- *
- * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
- * no need to hold that much forward allocation anyway.
- */
- if (unlikely(sk->sk_forward_alloc >= 1 << 21))
- __sk_mem_reclaim(sk, 1 << 20);
-}
-
-DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
-static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
-{
- sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
- sk_wmem_queued_add(sk, -skb->truesize);
- sk_mem_uncharge(sk, skb->truesize);
- if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
- !sk->sk_tx_skb_cache && !skb_cloned(skb)) {
- skb_ext_reset(skb);
- skb_zcopy_clear(skb, true);
- sk->sk_tx_skb_cache = skb;
- return;
- }
- __kfree_skb(skb);
-}
-
-static inline void sock_release_ownership(struct sock *sk)
-{
- if (sk->sk_lock.owned) {
- sk->sk_lock.owned = 0;
-
- /* The sk_lock has mutex_unlock() semantics: */
- mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
- }
+ sk_mem_reclaim(sk);
}
/*
@@ -1526,13 +1708,11 @@ do { \
lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
} while (0)
-#ifdef CONFIG_LOCKDEP
static inline bool lockdep_sock_is_held(const struct sock *sk)
{
return lockdep_is_held(&sk->sk_lock) ||
lockdep_is_held(&sk->sk_lock.slock);
}
-#endif
void lock_sock_nested(struct sock *sk, int subclass);
@@ -1541,6 +1721,7 @@ static inline void lock_sock(struct sock *sk)
lock_sock_nested(sk, 0);
}
+void __lock_sock(struct sock *sk);
void __release_sock(struct sock *sk);
void release_sock(struct sock *sk);
@@ -1551,7 +1732,37 @@ void release_sock(struct sock *sk);
SINGLE_DEPTH_NESTING)
#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
-bool lock_sock_fast(struct sock *sk);
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
+
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken:
+ *
+ * sk_lock.slock locked, owned = 0, BH disabled
+ *
+ * return true if slow path is taken:
+ *
+ * sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+static inline bool lock_sock_fast(struct sock *sk)
+{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
+ return __lock_sock_fast(sk);
+}
+
+/* fast socket lock variant for caller already holding a [different] socket lock */
+static inline bool lock_sock_fast_nested(struct sock *sk)
+{
+ mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+
+ return __lock_sock_fast(sk);
+}
+
/**
* unlock_sock_fast - complement of lock_sock_fast
* @sk: socket
@@ -1561,13 +1772,22 @@ bool lock_sock_fast(struct sock *sk);
* If slow mode is on, we call regular release_sock()
*/
static inline void unlock_sock_fast(struct sock *sk, bool slow)
+ __releases(&sk->sk_lock.slock)
{
- if (slow)
+ if (slow) {
release_sock(sk);
- else
+ __release(&sk->sk_lock.slock);
+ } else {
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
spin_unlock_bh(&sk->sk_lock.slock);
+ }
}
+void sockopt_lock_sock(struct sock *sk);
+void sockopt_release_sock(struct sock *sk);
+bool sockopt_ns_capable(struct user_namespace *ns, int cap);
+bool sockopt_capable(int cap);
+
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
@@ -1600,12 +1820,23 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
return sk->sk_lock.owned;
}
+static inline void sock_release_ownership(struct sock *sk)
+{
+ if (sock_owned_by_user_nocheck(sk)) {
+ sk->sk_lock.owned = 0;
+
+ /* The sk_lock has mutex_unlock() semantics: */
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
+ }
+}
+
/* no reclassification while locks are held */
static inline bool sock_allow_reclassification(const struct sock *csk)
{
struct sock *sk = (struct sock *)csk;
- return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock);
+ return !sock_owned_by_user_nocheck(sk) &&
+ !spin_is_locked(&sk->sk_lock.slock);
}
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
@@ -1626,27 +1857,45 @@ void sock_rfree(struct sk_buff *skb);
void sock_efree(struct sk_buff *skb);
#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb);
+void sock_pfree(struct sk_buff *skb);
#else
#define sock_edemux sock_efree
#endif
+int sk_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen);
int sock_setsockopt(struct socket *sock, int level, int op,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
+int sk_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen);
int sock_getsockopt(struct socket *sock, int level, int op,
char __user *optval, int __user *optlen);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32);
-struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
- int noblock, int *errcode);
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
int *errcode, int max_page_order);
+
+static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk,
+ unsigned long size,
+ int noblock, int *errcode)
+{
+ return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
+}
+
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority);
void sock_kfree_s(struct sock *sk, void *mem, int size);
void sock_kzfree_s(struct sock *sk, void *mem, int size);
void sk_send_sigurg(struct sock *sk);
+static inline void sock_replace_proto(struct sock *sk, struct proto *proto)
+{
+ if (sk->sk_socket)
+ clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ WRITE_ONCE(sk->sk_prot, proto);
+}
+
struct sockcm_cookie {
u64 transmit_time;
u32 mark;
@@ -1676,8 +1925,6 @@ int sock_no_getname(struct socket *, struct sockaddr *, int);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
int sock_no_listen(struct socket *, int);
int sock_no_shutdown(struct socket *, int);
-int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *);
-int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int);
int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
@@ -1697,11 +1944,7 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_sock_common_getsockopt(struct socket *sock, int level,
- int optname, char __user *optval, int __user *optlen);
-int compat_sock_common_setsockopt(struct socket *sock, int level,
- int optname, char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
void sk_common_release(struct sock *sk);
@@ -1779,40 +2022,54 @@ static inline int sk_tx_queue_get(const struct sock *sk)
return -1;
}
-static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+static inline void __sk_rx_queue_set(struct sock *sk,
+ const struct sk_buff *skb,
+ bool force_set)
{
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
if (skb_rx_queue_recorded(skb)) {
u16 rx_queue = skb_get_rx_queue(skb);
- if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING))
- return;
-
- sk->sk_rx_queue_mapping = rx_queue;
+ if (force_set ||
+ unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
+ WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue);
}
#endif
}
+static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+{
+ __sk_rx_queue_set(sk, skb, true);
+}
+
+static inline void sk_rx_queue_update(struct sock *sk, const struct sk_buff *skb)
+{
+ __sk_rx_queue_set(sk, skb, false);
+}
+
static inline void sk_rx_queue_clear(struct sock *sk)
{
-#ifdef CONFIG_XPS
- sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING;
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
+ WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING);
#endif
}
-#ifdef CONFIG_XPS
static inline int sk_rx_queue_get(const struct sock *sk)
{
- if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING)
- return sk->sk_rx_queue_mapping;
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
+ if (sk) {
+ int res = READ_ONCE(sk->sk_rx_queue_mapping);
+
+ if (res != NO_QUEUE_MAPPING)
+ return res;
+ }
+#endif
return -1;
}
-#endif
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
- sk_tx_queue_clear(sk);
sk->sk_socket = sock;
}
@@ -1859,20 +2116,24 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
static inline u32 net_tx_rndhash(void)
{
- u32 v = prandom_u32();
+ u32 v = get_random_u32();
return v ?: 1;
}
static inline void sk_set_txhash(struct sock *sk)
{
- sk->sk_txhash = net_tx_rndhash();
+ /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
+ WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
}
-static inline void sk_rethink_txhash(struct sock *sk)
+static inline bool sk_rethink_txhash(struct sock *sk)
{
- if (sk->sk_txhash)
+ if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
sk_set_txhash(sk);
+ return true;
+ }
+ return false;
}
static inline struct dst_entry *
@@ -1895,12 +2156,10 @@ sk_dst_get(struct sock *sk)
return dst;
}
-static inline void dst_negative_advice(struct sock *sk)
+static inline void __dst_negative_advice(struct sock *sk)
{
struct dst_entry *ndst, *dst = __sk_dst_get(sk);
- sk_rethink_txhash(sk);
-
if (dst && dst->ops->negative_advice) {
ndst = dst->ops->negative_advice(dst);
@@ -1912,6 +2171,12 @@ static inline void dst_negative_advice(struct sock *sk)
}
}
+static inline void dst_negative_advice(struct sock *sk)
+{
+ sk_rethink_txhash(sk);
+ __dst_negative_advice(sk);
+}
+
static inline void
__sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
@@ -1962,13 +2227,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
{
if (skb_get_dst_pending_confirm(skb)) {
struct sock *sk = skb->sk;
- unsigned long now = jiffies;
- /* avoid dirtying neighbour */
- if (READ_ONCE(n->confirmed) != now)
- WRITE_ONCE(n->confirmed, now);
if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
+ neigh_confirm(n);
}
}
@@ -1981,10 +2243,10 @@ static inline bool sk_can_gso(const struct sock *sk)
void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
-static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
+static inline void sk_gso_disable(struct sock *sk)
{
- sk->sk_route_nocaps |= flags;
- sk->sk_route_caps &= ~flags;
+ sk->sk_gso_disabled = 1;
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
@@ -2030,9 +2292,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
if (err)
return err;
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
+ skb_len_add(skb, copy);
sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
@@ -2131,9 +2391,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{
- if (sk->sk_txhash) {
+ /* This pairs with WRITE_ONCE() in sk_set_txhash() */
+ u32 txhash = READ_ONCE(sk->sk_txhash);
+
+ if (txhash) {
skb->l4_hash = 1;
- skb->hash = sk->sk_txhash;
+ skb->hash = txhash;
}
}
@@ -2156,17 +2419,46 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
sk_mem_charge(sk, skb->truesize);
}
+static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
+{
+ if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
+ skb_orphan(skb);
+ skb->destructor = sock_efree;
+ skb->sk = sk;
+ return true;
+ }
+ return false;
+}
+
+static inline void skb_prepare_for_gro(struct sk_buff *skb)
+{
+ if (skb->destructor != sock_wfree) {
+ skb_orphan(skb);
+ return;
+ }
+ skb->slow_gro = 1;
+}
+
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
unsigned long expires);
void sk_stop_timer(struct sock *sk, struct timer_list *timer);
+void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer);
+
int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
struct sk_buff *skb, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb));
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason);
+
+static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return sock_queue_rcv_skb_reason(sk, skb, NULL);
+}
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
@@ -2178,12 +2470,19 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
static inline int sock_error(struct sock *sk)
{
int err;
- if (likely(!sk->sk_err))
+
+ /* Avoid an atomic operation for the common case.
+ * This is racy since another cpu/thread can change sk_err under us.
+ */
+ if (likely(data_race(!sk->sk_err)))
return 0;
+
err = xchg(&sk->sk_err, 0);
return -err;
}
+void sk_error_report(struct sock *sk);
+
static inline unsigned long sock_wspace(struct sock *sk)
{
int amt = 0;
@@ -2245,31 +2544,32 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
return;
val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
+ val = max_t(u32, val, sk_unused_reserved_mem(sk));
WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
}
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
- bool force_schedule);
-
/**
* sk_page_frag - return an appropriate page_frag
* @sk: socket
*
* Use the per task page_frag instead of the per socket one for
- * optimization when we know that we're in the normal context and owns
+ * optimization when we know that we're in process context and own
* everything that's associated with %current.
*
- * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
- * inside other socket operations and end up recursing into sk_page_frag()
- * while it's already in use.
+ * Both direct reclaim and page faults can nest inside other
+ * socket operations and end up recursing into sk_page_frag()
+ * while it's already in use: explicitly avoid task page_frag
+ * usage if the caller is potentially doing any of them.
+ * This assumes that page fault handlers use the GFP_NOFS flags.
*
* Return: a per task page_frag if context allows that,
* otherwise a per socket one.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
- if (gfpflags_normal_context(sk->sk_allocation))
+ if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) ==
+ (__GFP_DIRECT_RECLAIM | __GFP_FS))
return &current->task_frag;
return &sk->sk_frag;
@@ -2290,6 +2590,11 @@ static inline gfp_t gfp_any(void)
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}
+static inline gfp_t gfp_memcg_charge(void)
+{
+ return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
+}
+
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : sk->sk_rcvtimeo;
@@ -2404,20 +2709,21 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
__sock_recv_wifi_status(msg, sk, skb);
}
-void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb);
+void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb);
#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
-static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb)
+static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
{
-#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
- (1UL << SOCK_RCVTSTAMP))
+#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
+ (1UL << SOCK_RCVTSTAMP) | \
+ (1UL << SOCK_RCVMARK))
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
- __sock_recv_ts_and_drops(msg, sk, skb);
+ if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
+ __sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
@@ -2442,7 +2748,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
__sock_tx_timestamp(tsflags, tx_flags);
if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
- *tskey = sk->sk_tskey++;
+ *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
}
if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
*tx_flags |= SKBTX_WIFI_STATUS;
@@ -2460,7 +2766,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
&skb_shinfo(skb)->tskey);
}
-DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
+static inline bool sk_is_tcp(const struct sock *sk)
+{
+ return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
+}
+
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
@@ -2472,47 +2782,56 @@ DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
- if (static_branch_unlikely(&tcp_rx_skb_cache_key) &&
- !sk->sk_rx_skb_cache) {
- sk->sk_rx_skb_cache = skb;
- skb_orphan(skb);
- return;
- }
__kfree_skb(skb);
}
-static inline
-struct net *sock_net(const struct sock *sk)
+static inline bool
+skb_sk_is_prefetched(struct sk_buff *skb)
{
- return read_pnet(&sk->sk_net);
+#ifdef CONFIG_INET
+ return skb->destructor == sock_pfree;
+#else
+ return false;
+#endif /* CONFIG_INET */
}
-static inline
-void sock_net_set(struct sock *sk, struct net *net)
+/* This helper checks if a socket is a full socket,
+ * ie _not_ a timewait or request socket.
+ */
+static inline bool sk_fullsock(const struct sock *sk)
{
- write_pnet(&sk->sk_net, net);
+ return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
+}
+
+static inline bool
+sk_is_refcounted(struct sock *sk)
+{
+ /* Only full sockets have sk->sk_flags. */
+ return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
}
-static inline struct sock *skb_steal_sock(struct sk_buff *skb)
+/**
+ * skb_steal_sock - steal a socket from an sk_buff
+ * @skb: sk_buff to steal the socket from
+ * @refcounted: is set to true if the socket is reference-counted
+ */
+static inline struct sock *
+skb_steal_sock(struct sk_buff *skb, bool *refcounted)
{
if (skb->sk) {
struct sock *sk = skb->sk;
+ *refcounted = true;
+ if (skb_sk_is_prefetched(skb))
+ *refcounted = sk_is_refcounted(sk);
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
+ *refcounted = false;
return NULL;
}
-/* This helper checks if a socket is a full socket,
- * ie _not_ a timewait or request socket.
- */
-static inline bool sk_fullsock(const struct sock *sk)
-{
- return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
-}
-
/* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev.
* Check decrypted mark in case skb_orphan() cleared socket.
@@ -2575,24 +2894,25 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
+#define SKB_FRAG_PAGE_ORDER get_order(32768)
DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
- return *proto->sysctl_wmem;
+ return READ_ONCE(*proto->sysctl_wmem);
}
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
- return *proto->sysctl_rmem;
+ return READ_ONCE(*proto->sysctl_rmem);
}
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
@@ -2613,13 +2933,14 @@ static inline void sk_pacing_shift_update(struct sock *sk, int val)
*/
static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
{
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
int mdif;
- if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)
+ if (!bound_dev_if || bound_dev_if == dif)
return true;
mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
- if (mdif && mdif == sk->sk_bound_dev_if)
+ if (mdif && mdif == bound_dev_if)
return true;
return false;
@@ -2627,4 +2948,31 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
+void sock_set_timestamp(struct sock *sk, int optname, bool valbool);
+int sock_set_timestamping(struct sock *sk, int optname,
+ struct so_timestamping timestamping);
+
+void sock_enable_timestamps(struct sock *sk);
+void sock_no_linger(struct sock *sk);
+void sock_set_keepalive(struct sock *sk);
+void sock_set_priority(struct sock *sk, u32 priority);
+void sock_set_rcvbuf(struct sock *sk, int val);
+void sock_set_mark(struct sock *sk, u32 val);
+void sock_set_reuseaddr(struct sock *sk);
+void sock_set_reuseport(struct sock *sk);
+void sock_set_sndtimeo(struct sock *sk, s64 secs);
+
+int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
+
+int sock_get_timeout(long timeo, void *optval, bool old_timeval);
+int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
+ sockptr_t optval, int optlen, bool old_timeval);
+
+static inline bool sk_is_readable(struct sock *sk)
+{
+ if (sk->sk_prot->sock_is_readable)
+ return sk->sk_prot->sock_is_readable(sk);
+ return false;
+}
#endif /* _SOCK_H */
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 43f4a818d88f..efc9085c6892 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -13,8 +13,9 @@ extern spinlock_t reuseport_lock;
struct sock_reuseport {
struct rcu_head rcu;
- u16 max_socks; /* length of socks */
- u16 num_socks; /* elements in socks */
+ u16 max_socks; /* length of socks */
+ u16 num_socks; /* elements in socks */
+ u16 num_closed_socks; /* closed elements in socks */
/* The last synq overflow event timestamp of this
* reuse->socks[] group.
*/
@@ -24,37 +25,38 @@ struct sock_reuseport {
unsigned int bind_inany:1;
unsigned int has_conns:1;
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
- struct sock *socks[0]; /* array of sock pointers */
+ struct sock *socks[]; /* array of sock pointers */
};
extern int reuseport_alloc(struct sock *sk, bool bind_inany);
extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
bool bind_inany);
extern void reuseport_detach_sock(struct sock *sk);
+void reuseport_stop_listen_sock(struct sock *sk);
extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash,
struct sk_buff *skb,
int hdr_len);
+struct sock *reuseport_migrate_sock(struct sock *sk,
+ struct sock *migrating_sk,
+ struct sk_buff *skb);
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
extern int reuseport_detach_prog(struct sock *sk);
-static inline bool reuseport_has_conns(struct sock *sk, bool set)
+static inline bool reuseport_has_conns(struct sock *sk)
{
struct sock_reuseport *reuse;
bool ret = false;
rcu_read_lock();
reuse = rcu_dereference(sk->sk_reuseport_cb);
- if (reuse) {
- if (set)
- reuse->has_conns = 1;
- ret = reuse->has_conns;
- }
+ if (reuse && reuse->has_conns)
+ ret = true;
rcu_read_unlock();
return ret;
}
-int reuseport_get_id(struct sock_reuseport *reuse);
+void reuseport_has_conns_set(struct sock *sk);
#endif /* _SOCK_REUSEPORT_H */
diff --git a/include/net/stp.h b/include/net/stp.h
index 2914e6d53490..528103fce2c0 100644
--- a/include/net/stp.h
+++ b/include/net/stp.h
@@ -2,6 +2,8 @@
#ifndef _NET_STP_H
#define _NET_STP_H
+#include <linux/if_ether.h>
+
struct stp_proto {
unsigned char group_address[ETH_ALEN];
void (*rcv)(const struct stp_proto *, struct sk_buff *,
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 1d20b98493a1..41e2ce9e9e10 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -54,10 +54,35 @@ struct strp_msg {
int offset;
};
+struct _strp_msg {
+ /* Internal cb structure. struct strp_msg must be first for passing
+ * to upper layer.
+ */
+ struct strp_msg strp;
+ int accum_len;
+};
+
+struct sk_skb_cb {
+#define SK_SKB_CB_PRIV_LEN 20
+ unsigned char data[SK_SKB_CB_PRIV_LEN];
+ /* align strp on cache line boundary within skb->cb[] */
+ unsigned char pad[4];
+ struct _strp_msg strp;
+
+ /* strp users' data follows */
+ struct tls_msg {
+ u8 control;
+ } tls;
+ /* temp_reg is a temporary register used for bpf_convert_data_end_access
+ * when dst_reg == src_reg.
+ */
+ u64 temp_reg;
+};
+
static inline struct strp_msg *strp_msg(struct sk_buff *skb)
{
return (struct strp_msg *)((void *)skb->cb +
- offsetof(struct qdisc_skb_cb, data));
+ offsetof(struct sk_skb_cb, strp));
}
/* Structure for an attached lower socket */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index aee86a189432..7dcdc97c0bc3 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,30 +16,36 @@
#define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1)
#define SWITCHDEV_F_DEFER BIT(2)
-struct switchdev_trans {
- bool ph_prepare;
-};
-
-static inline bool switchdev_trans_ph_prepare(struct switchdev_trans *trans)
-{
- return trans && trans->ph_prepare;
-}
-
-static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans)
-{
- return trans && !trans->ph_prepare;
-}
-
enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_UNDEFINED,
SWITCHDEV_ATTR_ID_PORT_STP_STATE,
+ SWITCHDEV_ATTR_ID_PORT_MST_STATE,
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
SWITCHDEV_ATTR_ID_PORT_MROUTER,
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
+ SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL,
SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
+ SWITCHDEV_ATTR_ID_BRIDGE_MST,
+ SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
+ SWITCHDEV_ATTR_ID_VLAN_MSTI,
+};
+
+struct switchdev_mst_state {
+ u16 msti;
+ u8 state;
+};
+
+struct switchdev_brport_flags {
+ unsigned long val;
+ unsigned long mask;
+};
+
+struct switchdev_vlan_msti {
+ u16 vid;
+ u16 msti;
};
struct switchdev_attr {
@@ -50,11 +56,16 @@ struct switchdev_attr {
void (*complete)(struct net_device *dev, int err, void *priv);
union {
u8 stp_state; /* PORT_STP_STATE */
- unsigned long brport_flags; /* PORT_{PRE}_BRIDGE_FLAGS */
+ struct switchdev_mst_state mst_state; /* PORT_MST_STATE */
+ struct switchdev_brport_flags brport_flags; /* PORT_BRIDGE_FLAGS */
bool mrouter; /* PORT_MROUTER */
clock_t ageing_time; /* BRIDGE_AGEING_TIME */
bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */
+ u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */
+ bool mst; /* BRIDGE_MST */
bool mc_disabled; /* MC_DISABLED */
+ u8 mrp_port_role; /* MRP_PORT_ROLE */
+ struct switchdev_vlan_msti vlan_msti; /* VLAN_MSTI */
} u;
};
@@ -63,9 +74,17 @@ enum switchdev_obj_id {
SWITCHDEV_OBJ_ID_PORT_VLAN,
SWITCHDEV_OBJ_ID_PORT_MDB,
SWITCHDEV_OBJ_ID_HOST_MDB,
+ SWITCHDEV_OBJ_ID_MRP,
+ SWITCHDEV_OBJ_ID_RING_TEST_MRP,
+ SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
+ SWITCHDEV_OBJ_ID_RING_STATE_MRP,
+ SWITCHDEV_OBJ_ID_IN_TEST_MRP,
+ SWITCHDEV_OBJ_ID_IN_ROLE_MRP,
+ SWITCHDEV_OBJ_ID_IN_STATE_MRP,
};
struct switchdev_obj {
+ struct list_head list;
struct net_device *orig_dev;
enum switchdev_obj_id id;
u32 flags;
@@ -77,8 +96,14 @@ struct switchdev_obj {
struct switchdev_obj_port_vlan {
struct switchdev_obj obj;
u16 flags;
- u16 vid_begin;
- u16 vid_end;
+ u16 vid;
+ /* If set, the notifier signifies a change of one of the following
+ * flags for a VLAN that already exists:
+ * - BRIDGE_VLAN_INFO_PVID
+ * - BRIDGE_VLAN_INFO_UNTAGGED
+ * Entries with BRIDGE_VLAN_INFO_BRENTRY unset are not notified at all.
+ */
+ bool changed;
};
#define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \
@@ -94,14 +119,105 @@ struct switchdev_obj_port_mdb {
#define SWITCHDEV_OBJ_PORT_MDB(OBJ) \
container_of((OBJ), struct switchdev_obj_port_mdb, obj)
+
+/* SWITCHDEV_OBJ_ID_MRP */
+struct switchdev_obj_mrp {
+ struct switchdev_obj obj;
+ struct net_device *p_port;
+ struct net_device *s_port;
+ u32 ring_id;
+ u16 prio;
+};
+
+#define SWITCHDEV_OBJ_MRP(OBJ) \
+ container_of((OBJ), struct switchdev_obj_mrp, obj)
+
+/* SWITCHDEV_OBJ_ID_RING_TEST_MRP */
+struct switchdev_obj_ring_test_mrp {
+ struct switchdev_obj obj;
+ /* The value is in us and a value of 0 represents to stop */
+ u32 interval;
+ u8 max_miss;
+ u32 ring_id;
+ u32 period;
+ bool monitor;
+};
+
+#define SWITCHDEV_OBJ_RING_TEST_MRP(OBJ) \
+ container_of((OBJ), struct switchdev_obj_ring_test_mrp, obj)
+
+/* SWICHDEV_OBJ_ID_RING_ROLE_MRP */
+struct switchdev_obj_ring_role_mrp {
+ struct switchdev_obj obj;
+ u8 ring_role;
+ u32 ring_id;
+ u8 sw_backup;
+};
+
+#define SWITCHDEV_OBJ_RING_ROLE_MRP(OBJ) \
+ container_of((OBJ), struct switchdev_obj_ring_role_mrp, obj)
+
+struct switchdev_obj_ring_state_mrp {
+ struct switchdev_obj obj;
+ u8 ring_state;
+ u32 ring_id;
+};
+
+#define SWITCHDEV_OBJ_RING_STATE_MRP(OBJ) \
+ container_of((OBJ), struct switchdev_obj_ring_state_mrp, obj)
+
+/* SWITCHDEV_OBJ_ID_IN_TEST_MRP */
+struct switchdev_obj_in_test_mrp {
+ struct switchdev_obj obj;
+ /* The value is in us and a value of 0 represents to stop */
+ u32 interval;
+ u32 in_id;
+ u32 period;
+ u8 max_miss;
+};
+
+#define SWITCHDEV_OBJ_IN_TEST_MRP(OBJ) \
+ container_of((OBJ), struct switchdev_obj_in_test_mrp, obj)
+
+/* SWICHDEV_OBJ_ID_IN_ROLE_MRP */
+struct switchdev_obj_in_role_mrp {
+ struct switchdev_obj obj;
+ struct net_device *i_port;
+ u32 ring_id;
+ u16 in_id;
+ u8 in_role;
+ u8 sw_backup;
+};
+
+#define SWITCHDEV_OBJ_IN_ROLE_MRP(OBJ) \
+ container_of((OBJ), struct switchdev_obj_in_role_mrp, obj)
+
+struct switchdev_obj_in_state_mrp {
+ struct switchdev_obj obj;
+ u32 in_id;
+ u8 in_state;
+};
+
+#define SWITCHDEV_OBJ_IN_STATE_MRP(OBJ) \
+ container_of((OBJ), struct switchdev_obj_in_state_mrp, obj)
+
typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
+struct switchdev_brport {
+ struct net_device *dev;
+ const void *ctx;
+ struct notifier_block *atomic_nb;
+ struct notifier_block *blocking_nb;
+ bool tx_fwd_offload;
+};
+
enum switchdev_notifier_type {
SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
SWITCHDEV_FDB_DEL_TO_BRIDGE,
SWITCHDEV_FDB_ADD_TO_DEVICE,
SWITCHDEV_FDB_DEL_TO_DEVICE,
SWITCHDEV_FDB_OFFLOADED,
+ SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */
SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */
@@ -112,35 +228,46 @@ enum switchdev_notifier_type {
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_OFFLOADED,
+
+ SWITCHDEV_BRPORT_OFFLOADED,
+ SWITCHDEV_BRPORT_UNOFFLOADED,
};
struct switchdev_notifier_info {
struct net_device *dev;
struct netlink_ext_ack *extack;
+ const void *ctx;
};
+/* Remember to update br_switchdev_fdb_populate() when adding
+ * new members to this structure
+ */
struct switchdev_notifier_fdb_info {
struct switchdev_notifier_info info; /* must be first */
const unsigned char *addr;
u16 vid;
u8 added_by_user:1,
+ is_local:1,
offloaded:1;
};
struct switchdev_notifier_port_obj_info {
struct switchdev_notifier_info info; /* must be first */
const struct switchdev_obj *obj;
- struct switchdev_trans *trans;
bool handled;
};
struct switchdev_notifier_port_attr_info {
struct switchdev_notifier_info info; /* must be first */
const struct switchdev_attr *attr;
- struct switchdev_trans *trans;
bool handled;
};
+struct switchdev_notifier_brport_info {
+ struct switchdev_notifier_info info; /* must be first */
+ const struct switchdev_brport brport;
+};
+
static inline struct net_device *
switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
{
@@ -153,11 +280,29 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info)
return info->extack;
}
+static inline bool
+switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return !fdb_info->added_by_user && !fdb_info->is_local;
+}
+
#ifdef CONFIG_NET_SWITCHDEV
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack);
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb);
+
void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
- const struct switchdev_attr *attr);
+ const struct switchdev_attr *attr,
+ struct netlink_ext_ack *extack);
int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack);
@@ -180,33 +325,76 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
struct net_device *group_dev,
bool joining);
+int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info));
+
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*add_cb)(struct net_device *dev,
+ int (*add_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack));
+int switchdev_handle_port_obj_add_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack));
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*del_cb)(struct net_device *dev,
+ int (*del_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj));
+int switchdev_handle_port_obj_del_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj));
int switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
- int (*set_cb)(struct net_device *dev,
+ int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
- struct switchdev_trans *trans));
+ struct netlink_ext_ack *extack));
#else
+static inline int
+switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+}
+
static inline void switchdev_deferred_process(void)
{
}
static inline int switchdev_port_attr_set(struct net_device *dev,
- const struct switchdev_attr *attr)
+ const struct switchdev_attr *attr,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
@@ -264,12 +452,36 @@ call_switchdev_blocking_notifiers(unsigned long val,
}
static inline int
+switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ return 0;
+}
+
+static inline int
switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*add_cb)(struct net_device *dev,
+ int (*add_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack))
+{
+ return 0;
+}
+
+static inline int switchdev_handle_port_obj_add_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack))
{
return 0;
@@ -279,7 +491,19 @@ static inline int
switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*del_cb)(struct net_device *dev,
+ int (*del_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj))
+{
+ return 0;
+}
+
+static inline int
+switchdev_handle_port_obj_del_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
return 0;
@@ -289,9 +513,9 @@ static inline int
switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
- int (*set_cb)(struct net_device *dev,
+ int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
- struct switchdev_trans *trans))
+ struct netlink_ext_ack *extack))
{
return 0;
}
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index bdc20ab3b88d..8250d6f0a462 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -25,6 +25,9 @@ struct tcf_ct_params {
u16 ct_action;
struct rcu_head rcu;
+
+ struct tcf_ct_flow_table *ct_ft;
+ struct nf_flowtable *nf_ft;
};
struct tcf_ct {
@@ -33,8 +36,10 @@ struct tcf_ct {
};
#define to_ct(a) ((struct tcf_ct *)a)
-#define to_ct_params(a) ((struct tcf_ct_params *) \
- rtnl_dereference((to_ct(a)->params)))
+#define to_ct_params(a) \
+ ((struct tcf_ct_params *) \
+ rcu_dereference_protected(to_ct(a)->params, \
+ lockdep_is_held(&a->tcfa_lock)))
static inline uint16_t tcf_ct_zone(const struct tc_action *a)
{
@@ -46,11 +51,36 @@ static inline int tcf_ct_action(const struct tc_action *a)
return to_ct_params(a)->ct_action;
}
+static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
+{
+ return to_ct_params(a)->nf_ft;
+}
+
#else
static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; }
static inline int tcf_ct_action(const struct tc_action *a) { return 0; }
+static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
+{
+ return NULL;
+}
#endif /* CONFIG_NF_CONNTRACK */
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+static inline void
+tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie)
+{
+ enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK;
+ struct nf_conn *ct;
+
+ ct = (struct nf_conn *)(cookie & NFCT_PTRMASK);
+ nf_conntrack_get(&ct->ct_general);
+ nf_ct_set(skb, ct, ctinfo);
+}
+#else
+static inline void
+tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) { }
+#endif
+
static inline bool is_tcf_ct(const struct tc_action *a)
{
#if defined(CONFIG_NET_CLS_ACT) && IS_ENABLED(CONFIG_NF_CONNTRACK)
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index eb8f01c819e6..832efd40e023 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -59,4 +59,19 @@ static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a)
return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK;
}
+static inline bool is_tcf_gact_continue(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_UNSPEC, false);
+}
+
+static inline bool is_tcf_gact_reclassify(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_RECLASSIFY, false);
+}
+
+static inline bool is_tcf_gact_pipe(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_PIPE, false);
+}
+
#endif /* __NET_TC_GACT_H */
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
new file mode 100644
index 000000000000..c8fa11ebb397
--- /dev/null
+++ b/include/net/tc_act/tc_gate.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright 2020 NXP */
+
+#ifndef __NET_TC_GATE_H
+#define __NET_TC_GATE_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_gate.h>
+
+struct action_gate_entry {
+ u8 gate_state;
+ u32 interval;
+ s32 ipv;
+ s32 maxoctets;
+};
+
+struct tcfg_gate_entry {
+ int index;
+ u8 gate_state;
+ u32 interval;
+ s32 ipv;
+ s32 maxoctets;
+ struct list_head list;
+};
+
+struct tcf_gate_params {
+ s32 tcfg_priority;
+ u64 tcfg_basetime;
+ u64 tcfg_cycletime;
+ u64 tcfg_cycletime_ext;
+ u32 tcfg_flags;
+ s32 tcfg_clockid;
+ size_t num_entries;
+ struct list_head entries;
+};
+
+#define GATE_ACT_GATE_OPEN BIT(0)
+#define GATE_ACT_PENDING BIT(1)
+
+struct tcf_gate {
+ struct tc_action common;
+ struct tcf_gate_params param;
+ u8 current_gate_status;
+ ktime_t current_close_time;
+ u32 current_entry_octets;
+ s32 current_max_octets;
+ struct tcfg_gate_entry *next_entry;
+ struct hrtimer hitimer;
+ enum tk_offsets tk_offset;
+};
+
+#define to_gate(a) ((struct tcf_gate *)a)
+
+static inline bool is_tcf_gate(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ if (a->ops && a->ops->id == TCA_ID_GATE)
+ return true;
+#endif
+ return false;
+}
+
+static inline s32 tcf_gate_prio(const struct tc_action *a)
+{
+ s32 tcfg_prio;
+
+ tcfg_prio = to_gate(a)->param.tcfg_priority;
+
+ return tcfg_prio;
+}
+
+static inline u64 tcf_gate_basetime(const struct tc_action *a)
+{
+ u64 tcfg_basetime;
+
+ tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+
+ return tcfg_basetime;
+}
+
+static inline u64 tcf_gate_cycletime(const struct tc_action *a)
+{
+ u64 tcfg_cycletime;
+
+ tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+
+ return tcfg_cycletime;
+}
+
+static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
+{
+ u64 tcfg_cycletimeext;
+
+ tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+
+ return tcfg_cycletimeext;
+}
+
+static inline u32 tcf_gate_num_entries(const struct tc_action *a)
+{
+ u32 num_entries;
+
+ num_entries = to_gate(a)->param.num_entries;
+
+ return num_entries;
+}
+
+static inline struct action_gate_entry
+ *tcf_gate_get_list(const struct tc_action *a)
+{
+ struct action_gate_entry *oe;
+ struct tcf_gate_params *p;
+ struct tcfg_gate_entry *entry;
+ u32 num_entries;
+ int i = 0;
+
+ p = &to_gate(a)->param;
+ num_entries = p->num_entries;
+
+ list_for_each_entry(entry, &p->entries, list)
+ i++;
+
+ if (i != num_entries)
+ return NULL;
+
+ oe = kcalloc(num_entries, sizeof(*oe), GFP_ATOMIC);
+ if (!oe)
+ return NULL;
+
+ i = 0;
+ list_for_each_entry(entry, &p->entries, list) {
+ oe[i].gate_state = entry->gate_state;
+ oe[i].interval = entry->interval;
+ oe[i].ipv = entry->ipv;
+ oe[i].maxoctets = entry->maxoctets;
+ i++;
+ }
+
+ return oe;
+}
+#endif
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 1cace4c69e44..32ce8ea36950 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -10,6 +10,7 @@ struct tcf_mirred {
int tcfm_eaction;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
+ netdevice_tracker tcfm_dev_tracker;
struct list_head tcfm_list;
};
#define to_mirred(a) ((struct tcf_mirred *)a)
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 748cf87a4d7e..3e02709a1df6 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -14,6 +14,7 @@ struct tcf_pedit {
struct tc_action common;
unsigned char tcfp_nkeys;
unsigned char tcfp_flags;
+ u32 tcfp_off_max_hint;
struct tc_pedit_key *tcfp_keys;
struct tcf_pedit_key_ex *tcfp_keys_ex;
};
diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index cfdc7cb82cad..283bde711a42 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -10,10 +10,13 @@ struct tcf_police_params {
s64 tcfp_burst;
u32 tcfp_mtu;
s64 tcfp_mtu_ptoks;
+ s64 tcfp_pkt_burst;
struct psched_ratecfg rate;
bool rate_present;
struct psched_ratecfg peak;
bool peak_present;
+ struct psched_pktrate ppsrate;
+ bool pps_present;
struct rcu_head rcu;
};
@@ -24,6 +27,7 @@ struct tcf_police {
spinlock_t tcfp_lock ____cacheline_aligned_in_smp;
s64 tcfp_toks;
s64 tcfp_ptoks;
+ s64 tcfp_pkttoks;
s64 tcfp_t_c;
};
@@ -54,17 +58,135 @@ static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act)
struct tcf_police *police = to_police(act);
struct tcf_police_params *params;
- params = rcu_dereference_bh_rtnl(police->params);
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
return params->rate.rate_bytes_ps;
}
-static inline s64 tcf_police_tcfp_burst(const struct tc_action *act)
+static inline u32 tcf_police_burst(const struct tc_action *act)
{
struct tcf_police *police = to_police(act);
struct tcf_police_params *params;
+ u32 burst;
- params = rcu_dereference_bh_rtnl(police->params);
- return params->tcfp_burst;
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+
+ /*
+ * "rate" bytes "burst" nanoseconds
+ * ------------ * -------------------
+ * 1 second 2^6 ticks
+ *
+ * ------------------------------------
+ * NSEC_PER_SEC nanoseconds
+ * ------------------------
+ * 2^6 ticks
+ *
+ * "rate" bytes "burst" nanoseconds 2^6 ticks
+ * = ------------ * ------------------- * ------------------------
+ * 1 second 2^6 ticks NSEC_PER_SEC nanoseconds
+ *
+ * "rate" * "burst"
+ * = ---------------- bytes/nanosecond
+ * NSEC_PER_SEC^2
+ *
+ *
+ * "rate" * "burst"
+ * = ---------------- bytes/second
+ * NSEC_PER_SEC
+ */
+ burst = div_u64(params->tcfp_burst * params->rate.rate_bytes_ps,
+ NSEC_PER_SEC);
+
+ return burst;
+}
+
+static inline u64 tcf_police_rate_pkt_ps(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+ return params->ppsrate.rate_pkts_ps;
+}
+
+static inline u32 tcf_police_burst_pkt(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+ u32 burst;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+
+ /*
+ * "rate" pkts "burst" nanoseconds
+ * ------------ * -------------------
+ * 1 second 2^6 ticks
+ *
+ * ------------------------------------
+ * NSEC_PER_SEC nanoseconds
+ * ------------------------
+ * 2^6 ticks
+ *
+ * "rate" pkts "burst" nanoseconds 2^6 ticks
+ * = ------------ * ------------------- * ------------------------
+ * 1 second 2^6 ticks NSEC_PER_SEC nanoseconds
+ *
+ * "rate" * "burst"
+ * = ---------------- pkts/nanosecond
+ * NSEC_PER_SEC^2
+ *
+ *
+ * "rate" * "burst"
+ * = ---------------- pkts/second
+ * NSEC_PER_SEC
+ */
+ burst = div_u64(params->tcfp_pkt_burst * params->ppsrate.rate_pkts_ps,
+ NSEC_PER_SEC);
+
+ return burst;
+}
+
+static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+ return params->tcfp_mtu;
+}
+
+static inline u64 tcf_police_peakrate_bytes_ps(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+ return params->peak.rate_bytes_ps;
+}
+
+static inline u32 tcf_police_tcfp_ewma_rate(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+ return params->tcfp_ewma_rate;
+}
+
+static inline u16 tcf_police_rate_overhead(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+ return params->rate.overhead;
}
#endif /* __NET_TC_POLICE_H */
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index b22a1f641f02..dc1079f28e13 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -17,6 +17,7 @@ struct tcf_skbedit_params {
u32 mark;
u32 mask;
u16 queue_mapping;
+ u16 mapping_mod;
u16 ptype;
struct rcu_head rcu;
};
@@ -27,8 +28,8 @@ struct tcf_skbedit {
};
#define to_skbedit(a) ((struct tcf_skbedit *)a)
-/* Return true iff action is mark */
-static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
+/* Return true iff action is the one identified by FLAG. */
+static inline bool is_tcf_skbedit_with_flag(const struct tc_action *a, u32 flag)
{
#ifdef CONFIG_NET_CLS_ACT
u32 flags;
@@ -37,12 +38,18 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
rcu_read_lock();
flags = rcu_dereference(to_skbedit(a)->params)->flags;
rcu_read_unlock();
- return flags == SKBEDIT_F_MARK;
+ return flags == flag;
}
#endif
return false;
}
+/* Return true iff action is mark */
+static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
+{
+ return is_tcf_skbedit_with_flag(a, SKBEDIT_F_MARK);
+}
+
static inline u32 tcf_skbedit_mark(const struct tc_action *a)
{
u32 mark;
@@ -57,17 +64,7 @@ static inline u32 tcf_skbedit_mark(const struct tc_action *a)
/* Return true iff action is ptype */
static inline bool is_tcf_skbedit_ptype(const struct tc_action *a)
{
-#ifdef CONFIG_NET_CLS_ACT
- u32 flags;
-
- if (a->ops && a->ops->id == TCA_ID_SKBEDIT) {
- rcu_read_lock();
- flags = rcu_dereference(to_skbedit(a)->params)->flags;
- rcu_read_unlock();
- return flags == SKBEDIT_F_PTYPE;
- }
-#endif
- return false;
+ return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PTYPE);
}
static inline u32 tcf_skbedit_ptype(const struct tc_action *a)
@@ -81,4 +78,33 @@ static inline u32 tcf_skbedit_ptype(const struct tc_action *a)
return ptype;
}
+/* Return true iff action is priority */
+static inline bool is_tcf_skbedit_priority(const struct tc_action *a)
+{
+ return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PRIORITY);
+}
+
+static inline u32 tcf_skbedit_priority(const struct tc_action *a)
+{
+ u32 priority;
+
+ rcu_read_lock();
+ priority = rcu_dereference(to_skbedit(a)->params)->priority;
+ rcu_read_unlock();
+
+ return priority;
+}
+
+/* Return true iff action is queue_mapping */
+static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a)
+{
+ return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING);
+}
+
+/* Return true iff action is inheritdsfield */
+static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a)
+{
+ return is_tcf_skbedit_with_flag(a, SKBEDIT_F_INHERITDSFIELD);
+}
+
#endif /* __NET_TC_SKBEDIT_H */
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index 0689d9bcdf84..879fe8cff581 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -28,8 +28,10 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a)
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_tunnel_key *t = to_tunnel_key(a);
- struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+ struct tcf_tunnel_key_params *params;
+ params = rcu_dereference_protected(t->params,
+ lockdep_is_held(&a->tcfa_lock));
if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY)
return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET;
#endif
@@ -40,8 +42,10 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a)
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_tunnel_key *t = to_tunnel_key(a);
- struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+ struct tcf_tunnel_key_params *params;
+ params = rcu_dereference_protected(t->params,
+ lockdep_is_held(&a->tcfa_lock));
if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY)
return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE;
#endif
@@ -52,7 +56,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_tunnel_key *t = to_tunnel_key(a);
- struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+ struct tcf_tunnel_key_params *params;
+
+ params = rcu_dereference_protected(t->params,
+ lockdep_is_held(&a->tcfa_lock));
return &params->tcft_enc_metadata->u.tun_info;
#else
@@ -69,7 +76,7 @@ tcf_tunnel_info_copy(const struct tc_action *a)
if (tun) {
size_t tun_size = sizeof(*tun) + tun->options_len;
struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size,
- GFP_KERNEL);
+ GFP_ATOMIC);
return tun_copy;
}
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 4e2502408c31..904eddfc1826 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -11,9 +11,12 @@
struct tcf_vlan_params {
int tcfv_action;
+ unsigned char tcfv_push_dst[ETH_ALEN];
+ unsigned char tcfv_push_src[ETH_ALEN];
u16 tcfv_push_vid;
__be16 tcfv_push_proto;
u8 tcfv_push_prio;
+ bool tcfv_push_prio_exists;
struct rcu_head rcu;
};
@@ -75,4 +78,14 @@ static inline u8 tcf_vlan_push_prio(const struct tc_action *a)
return tcfv_push_prio;
}
+
+static inline void tcf_vlan_push_eth(unsigned char *src, unsigned char *dest,
+ const struct tc_action *a)
+{
+ rcu_read_lock();
+ memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_dst, ETH_ALEN);
+ memcpy(src, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_src, ETH_ALEN);
+ rcu_read_unlock();
+}
+
#endif /* __NET_TC_VLAN_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a5ea27df3c2b..14d45661a84d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -23,9 +23,9 @@
#include <linux/cache.h>
#include <linux/percpu.h>
#include <linux/skbuff.h>
-#include <linux/cryptohash.h>
#include <linux/kref.h>
#include <linux/ktime.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -48,10 +48,12 @@
extern struct inet_hashinfo tcp_hashinfo;
-extern struct percpu_counter tcp_orphan_count;
+DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
+int tcp_orphan_count_sum(void);
+
void tcp_time_wait(struct sock *sk, int state, int timeo);
-#define MAX_TCP_HEADER (128 + MAX_HEADER)
+#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
#define MAX_TCP_OPTION_SPACE 40
#define TCP_MIN_SND_MSS 48
#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
@@ -126,6 +128,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
* to combine FIN-WAIT-2 timeout with
* TIME-WAIT timer.
*/
+#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
#if HZ >= 100
@@ -250,6 +253,8 @@ extern long sysctl_tcp_mem[3];
#define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */
extern atomic_long_t tcp_memory_allocated;
+DECLARE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
+
extern struct percpu_counter tcp_sockets_allocated;
extern unsigned long tcp_memory_pressure;
@@ -287,21 +292,18 @@ static inline bool tcp_out_of_memory(struct sock *sk)
return false;
}
-void sk_forced_mem_schedule(struct sock *sk, int size);
-
-static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
+static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
- struct percpu_counter *ocp = sk->sk_prot->orphan_count;
- int orphans = percpu_counter_read_positive(ocp);
-
- if (orphans << shift > sysctl_tcp_max_orphans) {
- orphans = percpu_counter_sum_positive(ocp);
- if (orphans << shift > sysctl_tcp_max_orphans)
- return true;
- }
- return false;
+ sk_wmem_queued_add(sk, -skb->truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_uncharge(sk, skb->truesize);
+ else
+ sk_mem_uncharge(sk, SKB_TRUESIZE(skb_end_offset(skb)));
+ __kfree_skb(skb);
}
+void sk_forced_mem_schedule(struct sock *sk, int size);
+
bool tcp_check_oom(struct sock *sk, int shift);
@@ -321,9 +323,12 @@ void tcp_shutdown(struct sock *sk, int how);
int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb);
+void tcp_remove_empty_skb(struct sock *sk);
int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
+int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
+ size_t size, struct ubuf_info *uarg);
int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
@@ -343,9 +348,12 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
void tcp_twsk_destructor(struct sock *sk);
+void tcp_twsk_purge(struct list_head *net_exit_list, int family);
ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
+struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+ bool force_schedule);
void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
static inline void tcp_dec_quickack_mode(struct sock *sk,
@@ -385,30 +393,37 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
int tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
-void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag);
+void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag);
void tcp_clear_retrans(struct tcp_sock *tp);
void tcp_update_metrics(struct sock *sk);
void tcp_init_metrics(struct sock *sk);
void tcp_metrics_init(void);
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
+void __tcp_close(struct sock *sk, long timeout);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
-void tcp_init_transfer(struct sock *sk, int bpf_op);
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
__poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
+int do_tcp_getsockopt(struct sock *sk, int level,
+ int optname, sockptr_t optval, sockptr_t optlen);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+bool tcp_bpf_bypass_getsockopt(int level, int optname);
+int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen);
+int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
-int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int flags, int *addr_len);
int tcp_set_rcvlowat(struct sock *sk, int val);
+int tcp_set_window_clamp(struct sock *sk, int val);
+void tcp_update_recv_tstamps(struct sk_buff *skb,
+ struct scm_timestamping_internal *tss);
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+ struct scm_timestamping_internal *tss);
void tcp_data_ready(struct sock *sk);
#ifdef CONFIG_MMU
int tcp_mmap(struct file *file, struct socket *sock,
@@ -426,6 +441,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
struct tcphdr *th, u32 *cookie);
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
struct tcphdr *th, u32 *cookie);
+u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss);
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct tcphdr *th);
@@ -436,6 +452,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
void tcp_v4_mtu_reduced(struct sock *sk);
void tcp_req_err(struct sock *sk, u32 seq, bool abort);
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq);
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(const struct sock *sk,
struct request_sock *req,
@@ -457,7 +474,8 @@ enum tcp_synack_type {
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
int tcp_disconnect(struct sock *sk, int flags);
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -471,6 +489,9 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
u32 cookie);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
+struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
+ struct sock *sk, struct sk_buff *skb);
#ifdef CONFIG_SYN_COOKIES
/* Syncookies use a monotonic timer which increments every 60 seconds.
@@ -570,6 +591,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
#endif
/* tcp_output.c */
+void tcp_skb_entail(struct sock *sk, struct sk_buff *skb);
+void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb);
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
int nonagle);
int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -605,9 +628,10 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
/* tcp_input.c */
void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
-void tcp_reset(struct sock *sk);
+void tcp_reset(struct sock *sk, struct sk_buff *skb);
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
+void tcp_check_space(struct sock *sk);
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
@@ -624,6 +648,7 @@ static inline void tcp_clear_xmit_timers(struct sock *sk)
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
unsigned int tcp_current_mss(struct sock *sk);
+u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when);
/* Bound MSS / TSO packet size with the half of the window */
static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
@@ -654,13 +679,15 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
+struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
+void tcp_read_done(struct sock *sk, size_t len);
void tcp_initialize_rcv_mss(struct sock *sk);
int tcp_mtu_to_mss(struct sock *sk, int pmtu);
int tcp_mss_to_mtu(struct sock *sk, int mss);
void tcp_mtup_init(struct sock *sk);
-void tcp_init_buffer_space(struct sock *sk);
static inline void tcp_bound_rto(const struct sock *sk)
{
@@ -675,6 +702,10 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
{
+ /* mptcp hooks are only on the slow path */
+ if (sk_is_mptcp((struct sock *)tp))
+ return;
+
tp->pred_flags = htonl((tp->tcp_header_len << 26) |
ntohl(TCP_FLAG_ACK) |
snd_wnd);
@@ -700,7 +731,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
static inline u32 tcp_rto_min(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
+ u32 rto_min = inet_csk(sk)->icsk_rto_min;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
@@ -859,10 +890,11 @@ struct tcp_skb_cb {
__u32 ack_seq; /* Sequence number ACK'd */
union {
struct {
+#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
/* There is space for up to 24 bytes */
- __u32 in_flight:30,/* Bytes in flight at transmit */
- is_app_limited:1, /* cwnd not fully used? */
- unused:1;
+ __u32 is_app_limited:1, /* cwnd not fully used? */
+ delivered_ce:20,
+ unused:11;
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
@@ -876,35 +908,12 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header; /* For incoming skbs */
- struct {
- __u32 flags;
- struct sock *sk_redir;
- void *data_end;
- } bpf;
};
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
-static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
-{
- TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
-}
-
-static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
-{
- return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
-}
-
-static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
-{
- return TCP_SKB_CB(skb)->bpf.sk_redir;
-}
-
-static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
-{
- TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
-}
+extern const struct inet_connection_sock_af_ops ipv4_specific;
#if IS_ENABLED(CONFIG_IPV6)
/* This is the variant of inet6_iif() that must be used by TCP,
@@ -931,17 +940,14 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb)
#endif
return 0;
}
-#endif
-static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
- return true;
+extern const struct inet_connection_sock_af_ops ipv6_specific;
+
+INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
+
#endif
- return false;
-}
/* TCP_SKB_CB reference means this can not be used from early demux */
static inline int tcp_v4_sdif(struct sk_buff *skb)
@@ -986,7 +992,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
const struct sk_buff *from)
{
return likely(tcp_skb_can_collapse_to(to) &&
- mptcp_skb_can_collapse(to, from));
+ mptcp_skb_can_collapse(to, from) &&
+ skb_pure_zcopy_same(to, from));
}
/* Events passed to congestion control interface */
@@ -1040,7 +1047,9 @@ struct ack_sample {
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
+ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
s32 delivered; /* number of packets delivered over interval */
+ s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
u32 snd_interval_us; /* snd interval for delivered packets */
u32 rcv_interval_us; /* rcv interval for delivered packets */
@@ -1048,50 +1057,63 @@ struct rate_sample {
int losses; /* number of packets marked lost upon ACK */
u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
u32 prior_in_flight; /* in flight before this ACK */
+ u32 last_end_seq; /* end_seq of most recently ACKed packet */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
};
struct tcp_congestion_ops {
- struct list_head list;
- u32 key;
- u32 flags;
-
- /* initialize private data (optional) */
- void (*init)(struct sock *sk);
- /* cleanup private data (optional) */
- void (*release)(struct sock *sk);
+/* fast path fields are put first to fill one cache line */
/* return slow start threshold (required) */
u32 (*ssthresh)(struct sock *sk);
+
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
+
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, u8 new_state);
+
/* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
+
/* call when ack arrives (optional) */
void (*in_ack_event)(struct sock *sk, u32 flags);
- /* new value of cwnd after loss (required) */
- u32 (*undo_cwnd)(struct sock *sk);
+
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+
/* override sysctl_tcp_min_tso_segs */
u32 (*min_tso_segs)(struct sock *sk);
- /* returns the multiplier used in tcp_sndbuf_expand (optional) */
- u32 (*sndbuf_expand)(struct sock *sk);
+
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
+
+
+ /* new value of cwnd after loss (required) */
+ u32 (*undo_cwnd)(struct sock *sk);
+ /* returns the multiplier used in tcp_sndbuf_expand (optional) */
+ u32 (*sndbuf_expand)(struct sock *sk);
+
+/* control/slow paths put last */
/* get info for inet_diag (optional) */
size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info);
- char name[TCP_CA_NAME_MAX];
- struct module *owner;
-};
+ char name[TCP_CA_NAME_MAX];
+ struct module *owner;
+ struct list_head list;
+ u32 key;
+ u32 flags;
+
+ /* initialize private data (optional) */
+ void (*init)(struct sock *sk);
+ /* cleanup private data (optional) */
+ void (*release)(struct sock *sk);
+} ____cacheline_aligned_in_smp;
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
@@ -1105,7 +1127,7 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
- bool reinit, bool cap_net_admin);
+ bool cap_net_admin);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
@@ -1133,15 +1155,6 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
}
-static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_ca_ops->set_state)
- icsk->icsk_ca_ops->set_state(sk, ca_state);
- icsk->icsk_ca_state = ca_state;
-}
-
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1150,6 +1163,9 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
icsk->icsk_ca_ops->cwnd_event(sk, event);
}
+/* From tcp_cong.c */
+void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
+
/* From tcp_rate.c */
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
@@ -1158,6 +1174,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
bool is_sack_reneg, struct rate_sample *rs);
void tcp_rate_check_app_limited(struct sock *sk);
+static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
+{
+ return t1 > t2 || (t1 == t2 && after(seq1, seq2));
+}
+
/* These functions determine how the current flow behaves in respect of SACK
* handling. SACK is negotiated with the peer, and therefore it can vary
* between different flows.
@@ -1201,9 +1222,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
#define TCP_INFINITE_SSTHRESH 0x7fffffff
+static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd;
+}
+
+static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val)
+{
+ WARN_ON_ONCE((int)val <= 0);
+ tp->snd_cwnd = val;
+}
+
static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
- return tp->snd_cwnd < tp->snd_ssthresh;
+ return tcp_snd_cwnd(tp) < tp->snd_ssthresh;
}
static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
@@ -1229,8 +1261,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
return tp->snd_ssthresh;
else
return max(tp->snd_ssthresh,
- ((tp->snd_cwnd >> 1) +
- (tp->snd_cwnd >> 2)));
+ ((tcp_snd_cwnd(tp) >> 1) +
+ (tcp_snd_cwnd(tp) >> 2)));
}
/* Use define here intentionally to get WARN_ON location shown at the caller */
@@ -1270,11 +1302,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+ if (tp->is_cwnd_limited)
+ return true;
+
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
- return tp->snd_cwnd < 2 * tp->max_packets_out;
+ return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out;
- return tp->is_cwnd_limited;
+ return false;
}
/* BBR congestion control needs pacing.
@@ -1288,26 +1323,22 @@ static inline bool tcp_needs_internal_pacing(const struct sock *sk)
return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
}
-/* Return in jiffies the delay before one skb is sent.
- * If @skb is NULL, we look at EDT for next packet being sent on the socket.
+/* Estimates in how many jiffies next packet for this flow can be sent.
+ * Scheduling a retransmit timer too early would be silly.
*/
-static inline unsigned long tcp_pacing_delay(const struct sock *sk,
- const struct sk_buff *skb)
+static inline unsigned long tcp_pacing_delay(const struct sock *sk)
{
- s64 pacing_delay = skb ? skb->tstamp : tcp_sk(sk)->tcp_wstamp_ns;
-
- pacing_delay -= tcp_sk(sk)->tcp_clock_cache;
+ s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache;
- return pacing_delay > 0 ? nsecs_to_jiffies(pacing_delay) : 0;
+ return delay > 0 ? nsecs_to_jiffies(delay) : 0;
}
static inline void tcp_reset_xmit_timer(struct sock *sk,
const int what,
unsigned long when,
- const unsigned long max_when,
- const struct sk_buff *skb)
+ const unsigned long max_when)
{
- inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk, skb),
+ inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
max_when);
}
@@ -1326,7 +1357,9 @@ static inline unsigned long tcp_probe0_base(const struct sock *sk)
static inline unsigned long tcp_probe0_when(const struct sock *sk,
unsigned long max_when)
{
- u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff;
+ u8 backoff = min_t(u8, ilog2(TCP_RTO_MAX / TCP_RTO_MIN) + 1,
+ inet_csk(sk)->icsk_backoff);
+ u64 when = (u64)tcp_probe0_base(sk) << backoff;
return (unsigned long)min_t(u64, when, max_when);
}
@@ -1335,8 +1368,7 @@ static inline void tcp_check_probe_timer(struct sock *sk)
{
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- tcp_probe0_base(sk), TCP_RTO_MAX,
- NULL);
+ tcp_probe0_base(sk), TCP_RTO_MAX);
}
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
@@ -1364,7 +1396,10 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
__skb_checksum_complete(skb);
}
-bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason);
+
+
int tcp_filter(struct sock *sk, struct sk_buff *skb);
void tcp_set_state(struct sock *sk, int state);
void tcp_done(struct sock *sk);
@@ -1376,7 +1411,6 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
rx_opt->num_sacks = 0;
}
-u32 tcp_default_init_rwnd(u32 mss);
void tcp_cwnd_restart(struct sock *sk, s32 delta);
static inline void tcp_slow_start_after_idle_check(struct sock *sk)
@@ -1385,8 +1419,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
- ca_ops->cong_control)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+ tp->packets_out || ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
@@ -1401,7 +1435,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
+ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1421,6 +1455,49 @@ static inline int tcp_full_space(const struct sock *sk)
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
+static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
+{
+ int unused_mem = sk_unused_reserved_mem(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ if (unused_mem)
+ tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh,
+ tcp_win_from_space(sk, unused_mem));
+}
+
+void tcp_cleanup_rbuf(struct sock *sk, int copied);
+
+/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
+ * If 87.5 % (7/8) of the space has been consumed, we want to override
+ * SO_RCVLOWAT constraint, since we are receiving skbs with too small
+ * len/truesize ratio.
+ */
+static inline bool tcp_rmem_pressure(const struct sock *sk)
+{
+ int rcvbuf, threshold;
+
+ if (tcp_under_memory_pressure(sk))
+ return true;
+
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ threshold = rcvbuf - (rcvbuf >> 3);
+
+ return atomic_read(&sk->sk_rmem_alloc) > threshold;
+}
+
+static inline bool tcp_epollin_ready(const struct sock *sk, int target)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+
+ if (avail <= 0)
+ return false;
+
+ return (avail >= target) || tcp_rmem_pressure(sk) ||
+ (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss);
+}
+
extern void tcp_openreq_init_rwin(struct request_sock *req,
const struct sock *sk_listener,
const struct dst_entry *dst);
@@ -1432,21 +1509,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+ return tp->keepalive_intvl ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+ return tp->keepalive_time ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+ return tp->keepalive_probes ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1459,7 +1539,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@ -1547,6 +1628,7 @@ struct tcp_md5sig_key {
u8 keylen;
u8 family; /* AF_INET or AF_INET6 */
u8 prefixlen;
+ u8 flags;
union tcp_md5_addr addr;
int l3index; /* set if key added with L3 scope */
u8 key[TCP_MD5SIG_MAXKEYLEN];
@@ -1592,10 +1674,10 @@ struct tcp_md5sig_pool {
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index,
+ int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen, gfp_t gfp);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index);
+ int family, u8 prefixlen, int l3index, u8 flags);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
@@ -1614,6 +1696,12 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index,
return __tcp_md5_do_lookup(sk, l3index, addr, family);
}
+enum skb_drop_reason
+tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int dif, int sdif);
+
+
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
#else
static inline struct tcp_md5sig_key *
@@ -1622,6 +1710,14 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index,
{
return NULL;
}
+
+static inline enum skb_drop_reason
+tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int dif, int sdif)
+{
+ return SKB_NOT_DROPPED_YET;
+}
#define tcp_twsk_md5_key(twsk) NULL
#endif
@@ -1657,6 +1753,8 @@ void tcp_fastopen_destroy_cipher(struct sock *sk);
void tcp_fastopen_ctx_destroy(struct net *net);
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
void *primary_key, void *backup_key);
+int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk,
+ u64 *key);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
@@ -1678,7 +1776,6 @@ struct tcp_fastopen_context {
struct rcu_head rcu;
};
-extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
void tcp_fastopen_active_disable(struct sock *sk);
bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
@@ -1756,11 +1853,6 @@ static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
return skb_rb_last(&sk->tcp_rtx_queue);
}
-static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
-{
- return skb_peek(&sk->sk_write_queue);
-}
-
static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
{
return skb_peek_tail(&sk->sk_write_queue);
@@ -1839,7 +1931,7 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc
{
list_del(&skb->tcp_tsorted_anchor);
tcp_rtx_queue_unlink(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
static inline void tcp_push_pending_frames(struct sock *sk)
@@ -1940,6 +2032,10 @@ void tcp_v4_destroy_sock(struct sock *sk);
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
int tcp_gro_complete(struct sk_buff *skb);
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
@@ -1947,21 +2043,10 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
-/* @wake is one when sk_stream_write_space() calls us.
- * This sends EPOLLOUT only if notsent_bytes is half the limit.
- * This mimics the strategy used in sock_def_write_space().
- */
-static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- u32 notsent_bytes = READ_ONCE(tp->write_seq) -
- READ_ONCE(tp->snd_nxt);
-
- return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
-}
+bool tcp_stream_memory_free(const struct sock *sk, int wake);
#ifdef CONFIG_PROC_FS
int tcp4_proc_init(void);
@@ -1984,7 +2069,7 @@ struct tcp_sock_af_ops {
const struct sk_buff *skb);
int (*md5_parse)(struct sock *sk,
int optname,
- char __user *optval,
+ sockptr_t optval,
int optlen);
#endif
};
@@ -1999,21 +2084,21 @@ struct tcp_request_sock_ops {
const struct sock *sk,
const struct sk_buff *skb);
#endif
- void (*init_req)(struct request_sock *req,
- const struct sock *sk_listener,
- struct sk_buff *skb);
#ifdef CONFIG_SYN_COOKIES
__u32 (*cookie_init_seq)(const struct sk_buff *skb,
__u16 *mss);
#endif
- struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
- const struct request_sock *req);
+ struct dst_entry *(*route_req)(const struct sock *sk,
+ struct sk_buff *skb,
+ struct flowi *fl,
+ struct request_sock *req);
u32 (*init_seq)(const struct sk_buff *skb);
u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
};
extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
@@ -2049,7 +2134,7 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb);
void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
u32 reo_wnd);
-extern void tcp_rack_mark_lost(struct sock *sk);
+extern bool tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
u64 xmit_time);
extern void tcp_rack_reo_timeout(struct sock *sk);
@@ -2132,9 +2217,13 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
u16 segs_in;
segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
- tp->segs_in += segs_in;
+
+ /* We update these fields while other threads might
+ * read them from tcp_get_info()
+ */
+ WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in);
if (skb->len > tcp_hdrlen(skb))
- tp->data_segs_in += segs_in;
+ WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in);
}
/*
@@ -2192,17 +2281,41 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
+#ifdef CONFIG_NET_SOCK_MSG
struct sk_msg;
struct sk_psock;
-int tcp_bpf_init(struct sock *sk);
-void tcp_bpf_reinit(struct sock *sk);
+#ifdef CONFIG_BPF_SYSCALL
+struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+#endif /* CONFIG_BPF_SYSCALL */
+
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
-int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len);
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len, int flags);
+#endif /* CONFIG_NET_SOCK_MSG */
+
+#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
+static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+#endif
+
+#ifdef CONFIG_CGROUP_BPF
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+ skops->skb = skb;
+ skops->skb_data_end = skb->data + end_offset;
+}
+#else
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+}
+#endif
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
@@ -2276,7 +2389,7 @@ static inline u32 tcp_timeout_init(struct sock *sk)
if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
- return timeout;
+ return min_t(int, timeout, TCP_RTO_MAX);
}
static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
diff --git a/include/net/tls.h b/include/net/tls.h
index bf9eb4823933..154949c7b0c8 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -39,7 +39,6 @@
#include <linux/crypto.h>
#include <linux/socket.h>
#include <linux/tcp.h>
-#include <linux/skmsg.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>
@@ -50,6 +49,17 @@
#include <crypto/aead.h>
#include <uapi/linux/tls.h>
+struct tls_rec;
+
+struct tls_cipher_size_desc {
+ unsigned int iv;
+ unsigned int key;
+ unsigned int salt;
+ unsigned int tag;
+ unsigned int rec_seq;
+};
+
+extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
/* Maximum data size carried in a TLS record */
#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14)
@@ -64,9 +74,11 @@
#define TLS_AAD_SPACE_SIZE 13
#define MAX_IV_SIZE 16
+#define TLS_TAG_SIZE 16
#define TLS_MAX_REC_SEQ_SIZE 8
+#define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE
-/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes.
+/* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes.
*
* IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3]
*
@@ -74,15 +86,7 @@
* Hence b0 contains (3 - 1) = 2.
*/
#define TLS_AES_CCM_IV_B0_BYTE 2
-
-#define __TLS_INC_STATS(net, field) \
- __SNMP_INC_STATS((net)->mib.tls_statistics, field)
-#define TLS_INC_STATS(net, field) \
- SNMP_INC_STATS((net)->mib.tls_statistics, field)
-#define __TLS_DEC_STATS(net, field) \
- __SNMP_DEC_STATS((net)->mib.tls_statistics, field)
-#define TLS_DEC_STATS(net, field) \
- SNMP_DEC_STATS((net)->mib.tls_statistics, field)
+#define TLS_SM4_CCM_IV_B0_BYTE 2
enum {
TLS_BASE,
@@ -92,37 +96,6 @@ enum {
TLS_NUM_CONFIG,
};
-/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
- * allocated or mapped for each TLS record. After encryption, the records are
- * stores in a linked list.
- */
-struct tls_rec {
- struct list_head list;
- int tx_ready;
- int tx_flags;
-
- struct sk_msg msg_plaintext;
- struct sk_msg msg_encrypted;
-
- /* AAD | msg_plaintext.sg.data | sg_tag */
- struct scatterlist sg_aead_in[2];
- /* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */
- struct scatterlist sg_aead_out[2];
-
- char content_type;
- struct scatterlist sg_content_type;
-
- char aad_space[TLS_AAD_SPACE_SIZE];
- u8 iv_data[MAX_IV_SIZE];
- struct aead_request aead_req;
- u8 aead_req_ctx[];
-};
-
-struct tls_msg {
- struct strp_msg rxm;
- u8 control;
-};
-
struct tx_work {
struct delayed_work work;
struct sock *sk;
@@ -135,6 +108,8 @@ struct tls_sw_context_tx {
struct tls_rec *open_rec;
struct list_head tx_list;
atomic_t encrypt_pending;
+ /* protect crypto_wait with encrypt_pending */
+ spinlock_t encrypt_compl_lock;
int async_notify;
u8 async_capable:1;
@@ -143,19 +118,38 @@ struct tls_sw_context_tx {
unsigned long tx_bitmask;
};
+struct tls_strparser {
+ struct sock *sk;
+
+ u32 mark : 8;
+ u32 stopped : 1;
+ u32 copy_mode : 1;
+ u32 msg_ready : 1;
+
+ struct strp_msg stm;
+
+ struct sk_buff *anchor;
+ struct work_struct work;
+};
+
struct tls_sw_context_rx {
struct crypto_aead *aead_recv;
struct crypto_wait async_wait;
- struct strparser strp;
struct sk_buff_head rx_list; /* list of decrypted 'data' records */
void (*saved_data_ready)(struct sock *sk);
- struct sk_buff *recv_pkt;
- u8 control;
+ u8 reader_present;
u8 async_capable:1;
- u8 decrypted:1;
+ u8 zc_capable:1;
+ u8 reader_contended:1;
+
+ struct tls_strparser strp;
+
atomic_t decrypt_pending;
- bool async_notify;
+ /* protect crypto_wait with decrypt_pending*/
+ spinlock_t decrypt_compl_lock;
+ struct sk_buff_head async_hold;
+ struct wait_queue_head wq;
};
struct tls_record_info {
@@ -177,6 +171,8 @@ struct tls_offload_context_tx {
struct scatterlist sg_tx_data[MAX_SKB_FRAGS];
void (*sk_destruct)(struct sock *sk);
+ struct work_struct destruct_work;
+ struct tls_context *ctx;
u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
* Currently the belief is that there is not enough
@@ -189,12 +185,22 @@ struct tls_offload_context_tx {
(sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX)
enum tls_context_flags {
- TLS_RX_SYNC_RUNNING = 0,
+ /* tls_device_down was called after the netdev went down, device state
+ * was released, and kTLS works in software, even though rx_conf is
+ * still TLS_HW (needed for transition).
+ */
+ TLS_RX_DEV_DEGRADED = 0,
/* Unlike RX where resync is driven entirely by the core in TX only
* the driver knows when things went out of sync, so we need the flag
* to be atomic.
*/
TLS_TX_SYNC_SCHED = 1,
+ /* tls_dev_del was called for the RX side, device state was released,
+ * but tls_ctx->netdev might still be kept, because TX-side driver
+ * resources might not be released yet. Used to prevent the second
+ * tls_dev_del call in tls_device_down if it happens simultaneously.
+ */
+ TLS_RX_DEV_CLOSED = 2,
};
struct cipher_context {
@@ -207,6 +213,9 @@ union tls_crypto_context {
union {
struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
struct tls12_crypto_info_aes_gcm_256 aes_gcm_256;
+ struct tls12_crypto_info_chacha20_poly1305 chacha20_poly1305;
+ struct tls12_crypto_info_sm4_gcm sm4_gcm;
+ struct tls12_crypto_info_sm4_ccm sm4_ccm;
};
};
@@ -229,6 +238,8 @@ struct tls_context {
u8 tx_conf:3;
u8 rx_conf:3;
+ u8 zerocopy_sendfile:1;
+ u8 rx_no_pad:1;
int (*push_pending_record)(struct sock *sk, int flags);
void (*sk_write_space)(struct sock *sk);
@@ -236,7 +247,7 @@ struct tls_context {
void *priv_ctx_tx;
void *priv_ctx_rx;
- struct net_device *netdev;
+ struct net_device __rcu *netdev;
/* rw cache line */
struct cipher_context tx;
@@ -255,6 +266,7 @@ struct tls_context {
/* cache cold stuff */
struct proto *sk_proto;
+ struct sock *sk;
void (*sk_destruct)(struct sock *sk);
@@ -287,11 +299,20 @@ struct tlsdev_ops {
enum tls_offload_sync_type {
TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ = 0,
TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT = 1,
+ TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC = 2,
};
#define TLS_DEVICE_RESYNC_NH_START_IVAL 2
#define TLS_DEVICE_RESYNC_NH_MAX_IVAL 128
+#define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13
+struct tls_offload_resync_async {
+ atomic64_t req;
+ u16 loglen;
+ u16 rcd_delta;
+ u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX];
+};
+
struct tls_offload_context_rx {
/* sw must be the first member of tls_offload_context_rx */
struct tls_sw_context_rx sw;
@@ -310,6 +331,10 @@ struct tls_offload_context_rx {
u32 decrypted_failed;
u32 decrypted_tgt;
} resync_nh;
+ /* TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC */
+ struct {
+ struct tls_offload_resync_async *resync_async;
+ };
};
u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
@@ -322,42 +347,6 @@ struct tls_offload_context_rx {
#define TLS_OFFLOAD_CONTEXT_SIZE_RX \
(sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
-struct tls_context *tls_ctx_create(struct sock *sk);
-void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
-void update_sk_prot(struct sock *sk, struct tls_context *ctx);
-
-int wait_on_pending_writer(struct sock *sk, long *timeo);
-int tls_sk_query(struct sock *sk, int optname, char __user *optval,
- int __user *optlen);
-int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
- unsigned int optlen);
-
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
-void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
-void tls_sw_strparser_done(struct tls_context *tls_ctx);
-int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
-int tls_sw_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
-void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
-void tls_sw_release_resources_tx(struct sock *sk);
-void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
-void tls_sw_free_resources_rx(struct sock *sk);
-void tls_sw_release_resources_rx(struct sock *sk);
-void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
-int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len);
-bool tls_sw_stream_read(const struct sock *sk);
-ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len, unsigned int flags);
-
-int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int tls_device_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
-int tls_tx_records(struct sock *sk, int flags);
-
struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
u32 seq, u64 *p_record_sn);
@@ -371,59 +360,12 @@ static inline u32 tls_record_start_seq(struct tls_record_info *rec)
return rec->end_seq - rec->len;
}
-int tls_push_sg(struct sock *sk, struct tls_context *ctx,
- struct scatterlist *sg, u16 first_offset,
- int flags);
-int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
- int flags);
-void tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
-
-static inline struct tls_msg *tls_msg(struct sk_buff *skb)
-{
- return (struct tls_msg *)strp_msg(skb);
-}
-
-static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
-{
- return !!ctx->partially_sent_record;
-}
-
-static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
-{
- return tls_ctx->pending_open_record_frags;
-}
-
-static inline bool is_tx_ready(struct tls_sw_context_tx *ctx)
-{
- struct tls_rec *rec;
-
- rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
- if (!rec)
- return false;
-
- return READ_ONCE(rec->tx_ready);
-}
-
-static inline u16 tls_user_config(struct tls_context *ctx, bool tx)
-{
- u16 config = tx ? ctx->tx_conf : ctx->rx_conf;
-
- switch (config) {
- case TLS_BASE:
- return TLS_CONF_BASE;
- case TLS_SW:
- return TLS_CONF_SW;
- case TLS_HW:
- return TLS_CONF_HW;
- case TLS_HW_RECORD:
- return TLS_CONF_HW_RECORD;
- }
- return 0;
-}
-
struct sk_buff *
tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
struct sk_buff *skb);
+struct sk_buff *
+tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev,
+ struct sk_buff *skb);
static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
{
@@ -436,25 +378,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
#endif
}
-static inline void tls_err_abort(struct sock *sk, int err)
-{
- sk->sk_err = err;
- sk->sk_error_report(sk);
-}
-
-static inline bool tls_bigint_increment(unsigned char *seq, int len)
-{
- int i;
-
- for (i = len - 1; i >= 0; i--) {
- ++seq[i];
- if (seq[i] != 0)
- break;
- }
-
- return (i == -1);
-}
-
static inline struct tls_context *tls_get_ctx(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -465,81 +388,6 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk)
return (__force void *)icsk->icsk_ulp_data;
}
-static inline void tls_advance_record_sn(struct sock *sk,
- struct tls_prot_info *prot,
- struct cipher_context *ctx)
-{
- if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size))
- tls_err_abort(sk, EBADMSG);
-
- if (prot->version != TLS_1_3_VERSION)
- tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- prot->iv_size);
-}
-
-static inline void tls_fill_prepend(struct tls_context *ctx,
- char *buf,
- size_t plaintext_len,
- unsigned char record_type,
- int version)
-{
- struct tls_prot_info *prot = &ctx->prot_info;
- size_t pkt_len, iv_size = prot->iv_size;
-
- pkt_len = plaintext_len + prot->tag_size;
- if (version != TLS_1_3_VERSION) {
- pkt_len += iv_size;
-
- memcpy(buf + TLS_NONCE_OFFSET,
- ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
- }
-
- /* we cover nonce explicit here as well, so buf should be of
- * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
- */
- buf[0] = version == TLS_1_3_VERSION ?
- TLS_RECORD_TYPE_DATA : record_type;
- /* Note that VERSION must be TLS_1_2 for both TLS1.2 and TLS1.3 */
- buf[1] = TLS_1_2_VERSION_MINOR;
- buf[2] = TLS_1_2_VERSION_MAJOR;
- /* we can use IV for nonce explicit according to spec */
- buf[3] = pkt_len >> 8;
- buf[4] = pkt_len & 0xFF;
-}
-
-static inline void tls_make_aad(char *buf,
- size_t size,
- char *record_sequence,
- int record_sequence_size,
- unsigned char record_type,
- int version)
-{
- if (version != TLS_1_3_VERSION) {
- memcpy(buf, record_sequence, record_sequence_size);
- buf += 8;
- } else {
- size += TLS_CIPHER_AES_GCM_128_TAG_SIZE;
- }
-
- buf[0] = version == TLS_1_3_VERSION ?
- TLS_RECORD_TYPE_DATA : record_type;
- buf[1] = TLS_1_2_VERSION_MAJOR;
- buf[2] = TLS_1_2_VERSION_MINOR;
- buf[3] = size >> 8;
- buf[4] = size & 0xFF;
-}
-
-static inline void xor_iv_with_seq(int version, char *iv, char *seq)
-{
- int i;
-
- if (version == TLS_1_3_VERSION) {
- for (i = 0; i < 8; i++)
- iv[i + 4] ^= seq[i];
- }
-}
-
-
static inline struct tls_sw_context_rx *tls_sw_ctx_rx(
const struct tls_context *tls_ctx)
{
@@ -567,8 +415,14 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
return !!tls_sw_ctx_tx(ctx);
}
-void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
-void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
+static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+
+ if (!ctx)
+ return false;
+ return !!tls_sw_ctx_rx(ctx);
+}
static inline struct tls_offload_context_rx *
tls_offload_ctx_rx(const struct tls_context *tls_ctx)
@@ -576,7 +430,6 @@ tls_offload_ctx_rx(const struct tls_context *tls_ctx)
return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx;
}
-#if IS_ENABLED(CONFIG_TLS_DEVICE)
static inline void *__tls_driver_ctx(struct tls_context *tls_ctx,
enum tls_offload_ctx_dir direction)
{
@@ -591,15 +444,39 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction)
{
return __tls_driver_ctx(tls_get_ctx(sk), direction);
}
-#endif
+#define RESYNC_REQ BIT(0)
+#define RESYNC_REQ_ASYNC BIT(1)
/* The TLS context is valid until sk_destruct is called */
static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
- atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1);
+ atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | RESYNC_REQ);
+}
+
+/* Log all TLS record header TCP sequences in [seq, seq+len] */
+static inline void
+tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+ atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) |
+ ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC);
+ rx_ctx->resync_async->loglen = 0;
+ rx_ctx->resync_async->rcd_delta = 0;
+}
+
+static inline void
+tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+ atomic64_set(&rx_ctx->resync_async->req,
+ ((u64)ntohl(seq) << 32) | RESYNC_REQ);
}
static inline void
@@ -621,35 +498,11 @@ static inline bool tls_offload_tx_resync_pending(struct sock *sk)
return ret;
}
-int __net_init tls_proc_init(struct net *net);
-void __net_exit tls_proc_fini(struct net *net);
-
-int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
- unsigned char *record_type);
-int decrypt_skb(struct sock *sk, struct sk_buff *skb,
- struct scatterlist *sgout);
struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
-struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
- struct net_device *dev,
- struct sk_buff *skb);
-
-int tls_sw_fallback_init(struct sock *sk,
- struct tls_offload_context_tx *offload_ctx,
- struct tls_crypto_info *crypto_info);
-
#ifdef CONFIG_TLS_DEVICE
-void tls_device_init(void);
-void tls_device_cleanup(void);
void tls_device_sk_destruct(struct sock *sk);
-int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
-void tls_device_free_resources_tx(struct sock *sk);
-int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
-void tls_device_offload_cleanup_rx(struct sock *sk);
-void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq);
-int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
- struct sk_buff *skb, struct strp_msg *rxm);
static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk)
{
@@ -658,33 +511,5 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk)
return false;
return tls_get_ctx(sk)->rx_conf == TLS_HW;
}
-#else
-static inline void tls_device_init(void) {}
-static inline void tls_device_cleanup(void) {}
-
-static inline int
-tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void tls_device_free_resources_tx(struct sock *sk) {}
-
-static inline int
-tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void tls_device_offload_cleanup_rx(struct sock *sk) {}
-static inline void
-tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {}
-
-static inline int
-tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
- struct sk_buff *skb, struct strp_msg *rxm)
-{
- return 0;
-}
#endif
#endif /* _TLS_OFFLOAD_H */
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index a8f6020f1196..b830463e3dff 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -3,6 +3,7 @@
#define _TRANSP_V6_H
#include <net/checksum.h>
+#include <net/sock.h>
/* IPv6 transport protocols */
extern struct proto rawv6_prot;
@@ -12,6 +13,7 @@ extern struct proto tcpv6_prot;
extern struct proto pingv6_prot;
struct flowi6;
+struct ipcm6_cookie;
/* extension headers */
int ipv6_exthdrs_init(void);
@@ -56,9 +58,6 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp,
#define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006)
-/* address family specific functions */
-extern const struct inet_connection_sock_af_ops ipv4_specific;
-
void inet6_destroy_sock(struct sock *sk);
#define IPV6_SEQ_DGRAM_HEADER \
diff --git a/include/net/tso.h b/include/net/tso.h
index 7e166a570349..62c98a9c60f1 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -4,21 +4,22 @@
#include <net/ip.h>
-#define TSO_HEADER_SIZE 128
+#define TSO_HEADER_SIZE 256
struct tso_t {
- int next_frag_idx;
- void *data;
- size_t size;
- u16 ip_id;
- bool ipv6;
- u32 tcp_seq;
+ int next_frag_idx;
+ int size;
+ void *data;
+ u16 ip_id;
+ u8 tlen; /* transport header len */
+ bool ipv6;
+ u32 tcp_seq;
};
-int tso_count_descs(struct sk_buff *skb);
-void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+int tso_count_descs(const struct sk_buff *skb);
+void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last);
-void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size);
-void tso_start(struct sk_buff *skb, struct tso_t *tso);
+void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size);
+int tso_start(struct sk_buff *skb, struct tso_t *tso);
#endif /* _TSO_H */
diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h
index 2ea3deba4c99..7b0de7852908 100644
--- a/include/net/tun_proto.h
+++ b/include/net/tun_proto.h
@@ -1,7 +1,8 @@
#ifndef __NET_TUN_PROTO_H
#define __NET_TUN_PROTO_H
-#include <linux/kernel.h>
+#include <linux/if_ether.h>
+#include <linux/types.h>
/* One byte protocol values as defined by VXLAN-GPE and NSH. These will
* hopefully get a shared IANA registry.
diff --git a/include/net/udp.h b/include/net/udp.h
index e55d5f765807..fee053bcd17c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -27,6 +27,7 @@
#include <linux/ipv6.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
+#include <linux/indirect_call_wrapper.h>
/**
* struct udp_skb_cb - UDP(-Lite) private variables
@@ -94,6 +95,7 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
extern struct proto udp_prot;
extern atomic_long_t udp_memory_allocated;
+DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc);
/* sysctl variables for udp */
extern long sysctl_udp_mem[3];
@@ -163,29 +165,14 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
}
-typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
+typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
__be16 dport);
-struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
- struct udphdr *uh, struct sock *sk);
-int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
+void udp_v6_early_demux(struct sk_buff *skb);
+INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
- netdev_features_t features);
-
-static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
-{
- struct udphdr *uh;
- unsigned int hlen, off;
-
- off = skb_gro_offset(skb);
- hlen = off + sizeof(*uh);
- uh = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen))
- uh = skb_gro_header_slow(skb, hlen, off);
-
- return uh;
-}
+ netdev_features_t features, bool is_ipv6);
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
static inline int udp_lib_hash(struct sock *sk)
@@ -252,7 +239,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
@@ -260,18 +247,18 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
}
/* net/ipv4/udp.c */
-void udp_destruct_sock(struct sock *sk);
+void udp_destruct_common(struct sock *sk);
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
-struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *off, int *err);
+struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off,
+ int *err);
static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *err)
+ int *err)
{
int off = 0;
- return __skb_recv_udp(sk, flags, noblock, &off, err);
+ return __skb_recv_udp(sk, flags, &off, err);
}
int udp_v4_early_demux(struct sk_buff *skb);
@@ -299,14 +286,14 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
int udp_lib_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen,
+ sockptr_t optval, unsigned int optlen,
int (*push_pending_frames)(struct sock *));
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif, int sdif,
struct udp_table *tbl, struct sk_buff *skb);
-struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
+struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
struct sock *udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -317,8 +304,9 @@ struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *tbl,
struct sk_buff *skb);
-struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
* possibly multiple cache miss on dequeue()
@@ -440,6 +428,7 @@ struct udp_seq_afinfo {
struct udp_iter_state {
struct seq_net_private p;
int bucket;
+ struct udp_seq_afinfo *bpf_seq_afinfo;
};
void *udp_seq_start(struct seq_file *seq, loff_t *pos);
@@ -459,6 +448,7 @@ void udp_init(void);
DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void);
+void udp_encap_disable(void);
#if IS_ENABLED(CONFIG_IPV6)
DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void);
@@ -480,8 +470,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
* CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
* packets in udp_gro_complete_segment. As does UDP GSO, verified by
* udp_send_skb. But when those packets are looped in dev_loopback_xmit
- * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
- * specific case, where PARTIAL is both correct and required.
+ * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY.
+ * Reset in this specific case, where PARTIAL is both correct and
+ * required.
*/
if (skb->pkt_type == PACKET_LOOPBACK)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -503,4 +494,33 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
}
+static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
+{
+ /* UDP-lite can't land here - no GRO */
+ WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
+
+ /* UDP packets generated with UDP_SEGMENT and traversing:
+ *
+ * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
+ *
+ * can reach an UDP socket with CHECKSUM_NONE, because
+ * __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE.
+ * SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will
+ * have a valid checksum, as the GRO engine validates the UDP csum
+ * before the aggregation and nobody strips such info in between.
+ * Instead of adding another check in the tunnel fastpath, we can force
+ * a valid csum after the segmentation.
+ * Additionally fixup the UDP CB.
+ */
+ UDP_SKB_CB(skb)->cscov = skb->len;
+ if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
+ skb->csum_valid = 1;
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+struct sk_psock;
+struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+#endif
+
#endif /* _UDP_H */
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 4b1f95e08307..72394f441dad 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net,
typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
struct sk_buff *skb);
+typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk,
+ struct sk_buff *skb,
+ unsigned int udp_offset);
typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
struct list_head *head,
@@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg {
__u8 encap_type;
udp_tunnel_encap_rcv_t encap_rcv;
udp_tunnel_encap_err_lookup_t encap_err_lookup;
+ udp_tunnel_encap_err_rcv_t encap_err_rcv;
udp_tunnel_encap_destroy_t encap_destroy;
udp_tunnel_gro_receive_t gro_receive;
udp_tunnel_gro_complete_t gro_complete;
@@ -106,15 +110,16 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
* call this function to perform Tx offloads on outgoing traffic.
*/
enum udp_parsable_tunnel_type {
- UDP_TUNNEL_TYPE_VXLAN, /* RFC 7348 */
- UDP_TUNNEL_TYPE_GENEVE, /* draft-ietf-nvo3-geneve */
- UDP_TUNNEL_TYPE_VXLAN_GPE, /* draft-ietf-nvo3-vxlan-gpe */
+ UDP_TUNNEL_TYPE_VXLAN = BIT(0), /* RFC 7348 */
+ UDP_TUNNEL_TYPE_GENEVE = BIT(1), /* draft-ietf-nvo3-geneve */
+ UDP_TUNNEL_TYPE_VXLAN_GPE = BIT(2), /* draft-ietf-nvo3-vxlan-gpe */
};
struct udp_tunnel_info {
unsigned short type;
sa_family_t sa_family;
__be16 port;
+ u8 hw_priv;
};
/* Notify network devices of offloadable types */
@@ -128,12 +133,16 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
static inline void udp_tunnel_get_rx_info(struct net_device *dev)
{
ASSERT_RTNL();
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ return;
call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
}
static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
{
ASSERT_RTNL();
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ return;
call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
}
@@ -143,14 +152,12 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
__be16 df, __be16 src_port, __be16 dst_port,
bool xnet, bool nocheck);
-#if IS_ENABLED(CONFIG_IPV6)
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck);
-#endif
void udp_tunnel_sock_release(struct socket *sock);
@@ -178,9 +185,198 @@ static inline void udp_tunnel_encap_enable(struct socket *sock)
#if IS_ENABLED(CONFIG_IPV6)
if (sock->sk->sk_family == PF_INET6)
ipv6_stub->udpv6_encap_enable();
- else
#endif
- udp_encap_enable();
+ udp_encap_enable();
}
+#define UDP_TUNNEL_NIC_MAX_TABLES 4
+
+enum udp_tunnel_nic_info_flags {
+ /* Device callbacks may sleep */
+ UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0),
+ /* Device only supports offloads when it's open, all ports
+ * will be removed before close and re-added after open.
+ */
+ UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1),
+ /* Device supports only IPv4 tunnels */
+ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2),
+ /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN.
+ * This port must not be counted towards n_entries of any table.
+ * Driver will not receive any callback associated with port 4789.
+ */
+ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3),
+};
+
+struct udp_tunnel_nic;
+
+#define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2)
+
+struct udp_tunnel_nic_shared {
+ struct udp_tunnel_nic *udp_tunnel_nic_info;
+
+ struct list_head devices;
+};
+
+struct udp_tunnel_nic_shared_node {
+ struct net_device *dev;
+ struct list_head list;
+};
+
+/**
+ * struct udp_tunnel_nic_info - driver UDP tunnel offload information
+ * @set_port: callback for adding a new port
+ * @unset_port: callback for removing a port
+ * @sync_table: callback for syncing the entire port table at once
+ * @shared: reference to device global state (optional)
+ * @flags: device flags from enum udp_tunnel_nic_info_flags
+ * @tables: UDP port tables this device has
+ * @tables.n_entries: number of entries in this table
+ * @tables.tunnel_types: types of tunnels this table accepts
+ *
+ * Drivers are expected to provide either @set_port and @unset_port callbacks
+ * or the @sync_table callback. Callbacks are invoked with rtnl lock held.
+ *
+ * Devices which (misguidedly) share the UDP tunnel port table across multiple
+ * netdevs should allocate an instance of struct udp_tunnel_nic_shared and
+ * point @shared at it.
+ * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices
+ * sharing a table.
+ *
+ * Known limitations:
+ * - UDP tunnel port notifications are fundamentally best-effort -
+ * it is likely the driver will both see skbs which use a UDP tunnel port,
+ * while not being a tunneled skb, and tunnel skbs from other ports -
+ * drivers should only use these ports for non-critical RX-side offloads,
+ * e.g. the checksum offload;
+ * - none of the devices care about the socket family at present, so we don't
+ * track it. Please extend this code if you care.
+ */
+struct udp_tunnel_nic_info {
+ /* one-by-one */
+ int (*set_port)(struct net_device *dev,
+ unsigned int table, unsigned int entry,
+ struct udp_tunnel_info *ti);
+ int (*unset_port)(struct net_device *dev,
+ unsigned int table, unsigned int entry,
+ struct udp_tunnel_info *ti);
+
+ /* all at once */
+ int (*sync_table)(struct net_device *dev, unsigned int table);
+
+ struct udp_tunnel_nic_shared *shared;
+
+ unsigned int flags;
+
+ struct udp_tunnel_nic_table_info {
+ unsigned int n_entries;
+ unsigned int tunnel_types;
+ } tables[UDP_TUNNEL_NIC_MAX_TABLES];
+};
+
+/* UDP tunnel module dependencies
+ *
+ * Tunnel drivers are expected to have a hard dependency on the udp_tunnel
+ * module. NIC drivers are not, they just attach their
+ * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come.
+ * Loading a tunnel driver will cause the udp_tunnel module to be loaded
+ * and only then will all the required state structures be allocated.
+ * Since we want a weak dependency from the drivers and the core to udp_tunnel
+ * we call things through the following stubs.
+ */
+struct udp_tunnel_nic_ops {
+ void (*get_port)(struct net_device *dev, unsigned int table,
+ unsigned int idx, struct udp_tunnel_info *ti);
+ void (*set_port_priv)(struct net_device *dev, unsigned int table,
+ unsigned int idx, u8 priv);
+ void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti);
+ void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti);
+ void (*reset_ntf)(struct net_device *dev);
+
+ size_t (*dump_size)(struct net_device *dev, unsigned int table);
+ int (*dump_write)(struct net_device *dev, unsigned int table,
+ struct sk_buff *skb);
+};
+
+#ifdef CONFIG_INET
+extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops;
+#else
+#define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL)
+#endif
+
+static inline void
+udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table,
+ unsigned int idx, struct udp_tunnel_info *ti)
+{
+ /* This helper is used from .sync_table, we indicate empty entries
+ * by zero'ed @ti. Drivers which need to know the details of a port
+ * when it gets deleted should use the .set_port / .unset_port
+ * callbacks.
+ * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings.
+ */
+ memset(ti, 0, sizeof(*ti));
+
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->get_port(dev, table, idx, ti);
+}
+
+static inline void
+udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
+ unsigned int idx, u8 priv)
+{
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv);
+}
+
+static inline void
+udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti)
+{
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ return;
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->add_port(dev, ti);
+}
+
+static inline void
+udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti)
+{
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ return;
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->del_port(dev, ti);
+}
+
+/**
+ * udp_tunnel_nic_reset_ntf() - device-originating reset notification
+ * @dev: network interface device structure
+ *
+ * Called by the driver to inform the core that the entire UDP tunnel port
+ * state has been lost, usually due to device reset. Core will assume device
+ * forgot all the ports and issue .set_port and .sync_table callbacks as
+ * necessary.
+ *
+ * This function must be called with rtnl lock held, and will issue all
+ * the callbacks before returning.
+ */
+static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev)
+{
+ if (udp_tunnel_nic_ops)
+ udp_tunnel_nic_ops->reset_ntf(dev);
+}
+
+static inline size_t
+udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
+{
+ if (!udp_tunnel_nic_ops)
+ return 0;
+ return udp_tunnel_nic_ops->dump_size(dev, table);
+}
+
+static inline int
+udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
+ struct sk_buff *skb)
+{
+ if (!udp_tunnel_nic_ops)
+ return 0;
+ return udp_tunnel_nic_ops->dump_write(dev, table, skb);
+}
#endif
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 9185e45b997f..299c14ce2bb9 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -6,6 +6,7 @@
#define _UDPLITE_H
#include <net/ip6_checksum.h>
+#include <net/udp.h>
/* UDP-Lite socket options */
#define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */
@@ -24,14 +25,6 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset,
return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT;
}
-/* Designate sk as UDP-Lite socket */
-static inline int udplite_sk_init(struct sock *sk)
-{
- udp_init_sock(sk);
- udp_sk(sk)->pcflag = UDPLITE_BIT;
- return 0;
-}
-
/*
* Checksumming routines
*/
@@ -70,49 +63,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
return 0;
}
-/* Slow-path computation of checksum. Socket is locked. */
-static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
-{
- const struct udp_sock *up = udp_sk(skb->sk);
- int cscov = up->len;
- __wsum csum = 0;
-
- if (up->pcflag & UDPLITE_SEND_CC) {
- /*
- * Sender has set `partial coverage' option on UDP-Lite socket.
- * The special case "up->pcslen == 0" signifies full coverage.
- */
- if (up->pcslen < up->len) {
- if (0 < up->pcslen)
- cscov = up->pcslen;
- udp_hdr(skb)->len = htons(up->pcslen);
- }
- /*
- * NOTE: Causes for the error case `up->pcslen > up->len':
- * (i) Application error (will not be penalized).
- * (ii) Payload too big for send buffer: data is split
- * into several packets, each with its own header.
- * In this case (e.g. last segment), coverage may
- * exceed packet length.
- * Since packets with coverage length > packet length are
- * illegal, we fall back to the defaults here.
- */
- }
-
- skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */
-
- skb_queue_walk(&sk->sk_write_queue, skb) {
- const int off = skb_transport_offset(skb);
- const int len = skb->len - off;
-
- csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
-
- if ((cscov -= len) <= 0)
- break;
- }
- return csum;
-}
-
/* Fast-path computation of checksum. Socket may not be locked. */
static inline __wsum udplite_csum(struct sk_buff *skb)
{
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 373aadcfea21..bca5b01af247 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -7,8 +7,10 @@
#include <net/dst_metadata.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
+#include <net/nexthop.h>
#define IANA_VXLAN_UDP_PORT 4789
+#define IANA_VXLAN_GPE_UDP_PORT 4790
/* VXLAN protocol (RFC 7348) header:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -120,6 +122,9 @@ struct vxlanhdr_gbp {
#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
#define VXLAN_GBP_ID_MASK (0xFFFF)
+#define VXLAN_GBP_MASK (VXLAN_GBP_DONT_LEARN | VXLAN_GBP_POLICY_APPLIED | \
+ VXLAN_GBP_ID_MASK)
+
/*
* VXLAN Generic Protocol Extension (VXLAN_F_GPE):
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -222,11 +227,56 @@ struct vxlan_config {
enum ifla_vxlan_df df;
};
+enum {
+ VXLAN_VNI_STATS_RX,
+ VXLAN_VNI_STATS_RX_DROPS,
+ VXLAN_VNI_STATS_RX_ERRORS,
+ VXLAN_VNI_STATS_TX,
+ VXLAN_VNI_STATS_TX_DROPS,
+ VXLAN_VNI_STATS_TX_ERRORS,
+};
+
+struct vxlan_vni_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_drops;
+ u64 rx_errors;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_drops;
+ u64 tx_errors;
+};
+
+struct vxlan_vni_stats_pcpu {
+ struct vxlan_vni_stats stats;
+ struct u64_stats_sync syncp;
+};
+
struct vxlan_dev_node {
struct hlist_node hlist;
struct vxlan_dev *vxlan;
};
+struct vxlan_vni_node {
+ struct rhash_head vnode;
+ struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */
+#endif
+ struct list_head vlist;
+ __be32 vni;
+ union vxlan_addr remote_ip; /* default remote ip for this vni */
+ struct vxlan_vni_stats_pcpu __percpu *stats;
+
+ struct rcu_head rcu;
+};
+
+struct vxlan_vni_group {
+ struct rhashtable vni_hash;
+ struct list_head vni_list;
+ u32 num_vnis;
+};
+
/* Pseudo network device */
struct vxlan_dev {
struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */
@@ -249,6 +299,8 @@ struct vxlan_dev {
struct vxlan_config cfg;
+ struct vxlan_vni_group __rcu *vnigrp;
+
struct hlist_head fdb_head[FDB_HASH_SIZE];
};
@@ -269,6 +321,7 @@ struct vxlan_dev {
#define VXLAN_F_GPE 0x4000
#define VXLAN_F_IPV6_LINKLOCAL 0x8000
#define VXLAN_F_TTL_INHERIT 0x10000
+#define VXLAN_F_VNIFILTER 0x20000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -278,7 +331,8 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_REMCSUM_RX | \
VXLAN_F_REMCSUM_NOPARTIAL | \
- VXLAN_F_COLLECT_METADATA)
+ VXLAN_F_COLLECT_METADATA | \
+ VXLAN_F_VNIFILTER)
/* Flags that can be set together with VXLAN_F_GPE. */
#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
@@ -287,7 +341,8 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM_TX | \
VXLAN_F_UDP_ZERO_CSUM6_TX | \
VXLAN_F_UDP_ZERO_CSUM6_RX | \
- VXLAN_F_COLLECT_METADATA)
+ VXLAN_F_COLLECT_METADATA | \
+ VXLAN_F_VNIFILTER)
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
@@ -487,4 +542,28 @@ static inline void vxlan_flag_attr_error(int attrtype,
#undef VXLAN_FLAG
}
+static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
+ int hash,
+ struct vxlan_rdst *rdst)
+{
+ struct fib_nh_common *nhc;
+
+ nhc = nexthop_path_fdb_result(nh, hash);
+ if (unlikely(!nhc))
+ return false;
+
+ switch (nhc->nhc_gw_family) {
+ case AF_INET:
+ rdst->remote_ip.sin.sin_addr.s_addr = nhc->nhc_gw.ipv4;
+ rdst->remote_ip.sa.sa_family = AF_INET;
+ break;
+ case AF_INET6:
+ rdst->remote_ip.sin6.sin6_addr = nhc->nhc_gw.ipv6;
+ rdst->remote_ip.sa.sa_family = AF_INET6;
+ break;
+ }
+
+ return true;
+}
+
#endif
diff --git a/include/net/wimax.h b/include/net/wimax.h
deleted file mode 100644
index 24ba7e89c26c..000000000000
--- a/include/net/wimax.h
+++ /dev/null
@@ -1,503 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Linux WiMAX
- * Kernel space API for accessing WiMAX devices
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * The WiMAX stack provides an API for controlling and managing the
- * system's WiMAX devices. This API affects the control plane; the
- * data plane is accessed via the network stack (netdev).
- *
- * Parts of the WiMAX stack API and notifications are exported to
- * user space via Generic Netlink. In user space, libwimax (part of
- * the wimax-tools package) provides a shim layer for accessing those
- * calls.
- *
- * The API is standarized for all WiMAX devices and different drivers
- * implement the backend support for it. However, device-specific
- * messaging pipes are provided that can be used to issue commands and
- * receive notifications in free form.
- *
- * Currently the messaging pipes are the only means of control as it
- * is not known (due to the lack of more devices in the market) what
- * will be a good abstraction layer. Expect this to change as more
- * devices show in the market. This API is designed to be growable in
- * order to address this problem.
- *
- * USAGE
- *
- * Embed a `struct wimax_dev` at the beginning of the the device's
- * private structure, initialize and register it. For details, see
- * `struct wimax_dev`s documentation.
- *
- * Once this is done, wimax-tools's libwimaxll can be used to
- * communicate with the driver from user space. You user space
- * application does not have to forcibily use libwimaxll and can talk
- * the generic netlink protocol directly if desired.
- *
- * Remember this is a very low level API that will to provide all of
- * WiMAX features. Other daemons and services running in user space
- * are the expected clients of it. They offer a higher level API that
- * applications should use (an example of this is the Intel's WiMAX
- * Network Service for the i2400m).
- *
- * DESIGN
- *
- * Although not set on final stone, this very basic interface is
- * mostly completed. Remember this is meant to grow as new common
- * operations are decided upon. New operations will be added to the
- * interface, intent being on keeping backwards compatibility as much
- * as possible.
- *
- * This layer implements a set of calls to control a WiMAX device,
- * exposing a frontend to the rest of the kernel and user space (via
- * generic netlink) and a backend implementation in the driver through
- * function pointers.
- *
- * WiMAX devices have a state, and a kernel-only API allows the
- * drivers to manipulate that state. State transitions are atomic, and
- * only some of them are allowed (see `enum wimax_st`).
- *
- * Most API calls will set the state automatically; in most cases
- * drivers have to only report state changes due to external
- * conditions.
- *
- * All API operations are 'atomic', serialized through a mutex in the
- * `struct wimax_dev`.
- *
- * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK
- *
- * The API is exported to user space using generic netlink (other
- * methods can be added as needed).
- *
- * There is a Generic Netlink Family named "WiMAX", where interfaces
- * supporting the WiMAX interface receive commands and broadcast their
- * signals over a multicast group named "msg".
- *
- * Mapping to the source/destination interface is done by an interface
- * index attribute.
- *
- * For user-to-kernel traffic (commands) we use a function call
- * marshalling mechanism, where a message X with attributes A, B, C
- * sent from user space to kernel space means executing the WiMAX API
- * call wimax_X(A, B, C), sending the results back as a message.
- *
- * Kernel-to-user (notifications or signals) communication is sent
- * over multicast groups. This allows to have multiple applications
- * monitoring them.
- *
- * Each command/signal gets assigned it's own attribute policy. This
- * way the validator will verify that all the attributes in there are
- * only the ones that should be for each command/signal. Thing of an
- * attribute mapping to a type+argumentname for each command/signal.
- *
- * If we had a single policy for *all* commands/signals, after running
- * the validator we'd have to check "does this attribute belong in
- * here"? for each one. It can be done manually, but it's just easier
- * to have the validator do that job with multiple policies. As well,
- * it makes it easier to later expand each command/signal signature
- * without affecting others and keeping the namespace more or less
- * sane. Not that it is too complicated, but it makes it even easier.
- *
- * No state information is maintained in the kernel for each user
- * space connection (the connection is stateless).
- *
- * TESTING FOR THE INTERFACE AND VERSIONING
- *
- * If network interface X is a WiMAX device, there will be a Generic
- * Netlink family named "WiMAX X" and the device will present a
- * "wimax" directory in it's network sysfs directory
- * (/sys/class/net/DEVICE/wimax) [used by HAL].
- *
- * The inexistence of any of these means the device does not support
- * this WiMAX API.
- *
- * By querying the generic netlink controller, versioning information
- * and the multicast groups available can be found. Applications using
- * the interface can either rely on that or use the generic netlink
- * controller to figure out which generic netlink commands/signals are
- * supported.
- *
- * NOTE: this versioning is a last resort to avoid hard
- * incompatibilities. It is the intention of the design of this
- * stack not to introduce backward incompatible changes.
- *
- * The version code has to fit in one byte (restrictions imposed by
- * generic netlink); we use `version / 10` for the major version and
- * `version % 10` for the minor. This gives 9 minors for each major
- * and 25 majors.
- *
- * The version change protocol is as follow:
- *
- * - Major versions: needs to be increased if an existing message/API
- * call is changed or removed. Doesn't need to be changed if a new
- * message is added.
- *
- * - Minor version: needs to be increased if new messages/API calls are
- * being added or some other consideration that doesn't impact the
- * user-kernel interface too much (like some kind of bug fix) and
- * that is kind of left up in the air to common sense.
- *
- * User space code should not try to work if the major version it was
- * compiled for differs from what the kernel offers. As well, if the
- * minor version of the kernel interface is lower than the one user
- * space is expecting (the one it was compiled for), the kernel
- * might be missing API calls; user space shall be ready to handle
- * said condition. Use the generic netlink controller operations to
- * find which ones are supported and which not.
- *
- * libwimaxll:wimaxll_open() takes care of checking versions.
- *
- * THE OPERATIONS:
- *
- * Each operation is defined in its on file (drivers/net/wimax/op-*.c)
- * for clarity. The parts needed for an operation are:
- *
- * - a function pointer in `struct wimax_dev`: optional, as the
- * operation might be implemented by the stack and not by the
- * driver.
- *
- * All function pointers are named wimax_dev->op_*(), and drivers
- * must implement them except where noted otherwise.
- *
- * - When exported to user space, a `struct nla_policy` to define the
- * attributes of the generic netlink command and a `struct genl_ops`
- * to define the operation.
- *
- * All the declarations for the operation codes (WIMAX_GNL_OP_<NAME>)
- * and generic netlink attributes (WIMAX_GNL_<NAME>_*) are declared in
- * include/linux/wimax.h; this file is intended to be cloned by user
- * space to gain access to those declarations.
- *
- * A few caveats to remember:
- *
- * - Need to define attribute numbers starting in 1; otherwise it
- * fails.
- *
- * - the `struct genl_family` requires a maximum attribute id; when
- * defining the `struct nla_policy` for each message, it has to have
- * an array size of WIMAX_GNL_ATTR_MAX+1.
- *
- * The op_*() function pointers will not be called if the wimax_dev is
- * in a state <= %WIMAX_ST_UNINITIALIZED. The exception is:
- *
- * - op_reset: can be called at any time after wimax_dev_add() has
- * been called.
- *
- * THE PIPE INTERFACE:
- *
- * This interface is kept intentionally simple. The driver can send
- * and receive free-form messages to/from user space through a
- * pipe. See drivers/net/wimax/op-msg.c for details.
- *
- * The kernel-to-user messages are sent with
- * wimax_msg(). user-to-kernel messages are delivered via
- * wimax_dev->op_msg_from_user().
- *
- * RFKILL:
- *
- * RFKILL support is built into the wimax_dev layer; the driver just
- * needs to call wimax_report_rfkill_{hw,sw}() to inform of changes in
- * the hardware or software RF kill switches. When the stack wants to
- * turn the radio off, it will call wimax_dev->op_rfkill_sw_toggle(),
- * which the driver implements.
- *
- * User space can set the software RF Kill switch by calling
- * wimax_rfkill().
- *
- * The code for now only supports devices that don't require polling;
- * If the device needs to be polled, create a self-rearming delayed
- * work struct for polling or look into adding polled support to the
- * WiMAX stack.
- *
- * When initializing the hardware (_probe), after calling
- * wimax_dev_add(), query the device for it's RF Kill switches status
- * and feed it back to the WiMAX stack using
- * wimax_report_rfkill_{hw,sw}(). If any switch is missing, always
- * report it as ON.
- *
- * NOTE: the wimax stack uses an inverted terminology to that of the
- * RFKILL subsystem:
- *
- * - ON: radio is ON, RFKILL is DISABLED or OFF.
- * - OFF: radio is OFF, RFKILL is ENABLED or ON.
- *
- * MISCELLANEOUS OPS:
- *
- * wimax_reset() can be used to reset the device to power on state; by
- * default it issues a warm reset that maintains the same device
- * node. If that is not possible, it falls back to a cold reset
- * (device reconnect). The driver implements the backend to this
- * through wimax_dev->op_reset().
- */
-
-#ifndef __NET__WIMAX_H__
-#define __NET__WIMAX_H__
-
-#include <linux/wimax.h>
-#include <net/genetlink.h>
-#include <linux/netdevice.h>
-
-struct net_device;
-struct genl_info;
-struct wimax_dev;
-
-/**
- * struct wimax_dev - Generic WiMAX device
- *
- * @net_dev: [fill] Pointer to the &struct net_device this WiMAX
- * device implements.
- *
- * @op_msg_from_user: [fill] Driver-specific operation to
- * handle a raw message from user space to the driver. The
- * driver can send messages to user space using with
- * wimax_msg_to_user().
- *
- * @op_rfkill_sw_toggle: [fill] Driver-specific operation to act on
- * userspace (or any other agent) requesting the WiMAX device to
- * change the RF Kill software switch (WIMAX_RF_ON or
- * WIMAX_RF_OFF).
- * If such hardware support is not present, it is assumed the
- * radio cannot be switched off and it is always on (and the stack
- * will error out when trying to switch it off). In such case,
- * this function pointer can be left as NULL.
- *
- * @op_reset: [fill] Driver specific operation to reset the
- * device.
- * This operation should always attempt first a warm reset that
- * does not disconnect the device from the bus and return 0.
- * If that fails, it should resort to some sort of cold or bus
- * reset (even if it implies a bus disconnection and device
- * disappearance). In that case, -ENODEV should be returned to
- * indicate the device is gone.
- * This operation has to be synchronous, and return only when the
- * reset is complete. In case of having had to resort to bus/cold
- * reset implying a device disconnection, the call is allowed to
- * return immediately.
- * NOTE: wimax_dev->mutex is NOT locked when this op is being
- * called; however, wimax_dev->mutex_reset IS locked to ensure
- * serialization of calls to wimax_reset().
- * See wimax_reset()'s documentation.
- *
- * @name: [fill] A way to identify this device. We need to register a
- * name with many subsystems (rfkill, workqueue creation, etc).
- * We can't use the network device name as that
- * might change and in some instances we don't know it yet (until
- * we don't call register_netdev()). So we generate an unique one
- * using the driver name and device bus id, place it here and use
- * it across the board. Recommended naming:
- * DRIVERNAME-BUSNAME:BUSID (dev->bus->name, dev->bus_id).
- *
- * @id_table_node: [private] link to the list of wimax devices kept by
- * id-table.c. Protected by it's own spinlock.
- *
- * @mutex: [private] Serializes all concurrent access and execution of
- * operations.
- *
- * @mutex_reset: [private] Serializes reset operations. Needs to be a
- * different mutex because as part of the reset operation, the
- * driver has to call back into the stack to do things such as
- * state change, that require wimax_dev->mutex.
- *
- * @state: [private] Current state of the WiMAX device.
- *
- * @rfkill: [private] integration into the RF-Kill infrastructure.
- *
- * @rf_sw: [private] State of the software radio switch (OFF/ON)
- *
- * @rf_hw: [private] State of the hardware radio switch (OFF/ON)
- *
- * @debugfs_dentry: [private] Used to hook up a debugfs entry. This
- * shows up in the debugfs root as wimax\:DEVICENAME.
- *
- * Description:
- * This structure defines a common interface to access all WiMAX
- * devices from different vendors and provides a common API as well as
- * a free-form device-specific messaging channel.
- *
- * Usage:
- * 1. Embed a &struct wimax_dev at *the beginning* the network
- * device structure so that netdev_priv() points to it.
- *
- * 2. memset() it to zero
- *
- * 3. Initialize with wimax_dev_init(). This will leave the WiMAX
- * device in the %__WIMAX_ST_NULL state.
- *
- * 4. Fill all the fields marked with [fill]; once called
- * wimax_dev_add(), those fields CANNOT be modified.
- *
- * 5. Call wimax_dev_add() *after* registering the network
- * device. This will leave the WiMAX device in the %WIMAX_ST_DOWN
- * state.
- * Protect the driver's net_device->open() against succeeding if
- * the wimax device state is lower than %WIMAX_ST_DOWN.
- *
- * 6. Select when the device is going to be turned on/initialized;
- * for example, it could be initialized on 'ifconfig up' (when the
- * netdev op 'open()' is called on the driver).
- *
- * When the device is initialized (at `ifconfig up` time, or right
- * after calling wimax_dev_add() from _probe(), make sure the
- * following steps are taken
- *
- * a. Move the device to %WIMAX_ST_UNINITIALIZED. This is needed so
- * some API calls that shouldn't work until the device is ready
- * can be blocked.
- *
- * b. Initialize the device. Make sure to turn the SW radio switch
- * off and move the device to state %WIMAX_ST_RADIO_OFF when
- * done. When just initialized, a device should be left in RADIO
- * OFF state until user space devices to turn it on.
- *
- * c. Query the device for the state of the hardware rfkill switch
- * and call wimax_rfkill_report_hw() and wimax_rfkill_report_sw()
- * as needed. See below.
- *
- * wimax_dev_rm() undoes before unregistering the network device. Once
- * wimax_dev_add() is called, the driver can get called on the
- * wimax_dev->op_* function pointers
- *
- * CONCURRENCY:
- *
- * The stack provides a mutex for each device that will disallow API
- * calls happening concurrently; thus, op calls into the driver
- * through the wimax_dev->op*() function pointers will always be
- * serialized and *never* concurrent.
- *
- * For locking, take wimax_dev->mutex is taken; (most) operations in
- * the API have to check for wimax_dev_is_ready() to return 0 before
- * continuing (this is done internally).
- *
- * REFERENCE COUNTING:
- *
- * The WiMAX device is reference counted by the associated network
- * device. The only operation that can be used to reference the device
- * is wimax_dev_get_by_genl_info(), and the reference it acquires has
- * to be released with dev_put(wimax_dev->net_dev).
- *
- * RFKILL:
- *
- * At startup, both HW and SW radio switchess are assumed to be off.
- *
- * At initialization time [after calling wimax_dev_add()], have the
- * driver query the device for the status of the software and hardware
- * RF kill switches and call wimax_report_rfkill_hw() and
- * wimax_rfkill_report_sw() to indicate their state. If any is
- * missing, just call it to indicate it is ON (radio always on).
- *
- * Whenever the driver detects a change in the state of the RF kill
- * switches, it should call wimax_report_rfkill_hw() or
- * wimax_report_rfkill_sw() to report it to the stack.
- */
-struct wimax_dev {
- struct net_device *net_dev;
- struct list_head id_table_node;
- struct mutex mutex; /* Protects all members and API calls */
- struct mutex mutex_reset;
- enum wimax_st state;
-
- int (*op_msg_from_user)(struct wimax_dev *wimax_dev,
- const char *,
- const void *, size_t,
- const struct genl_info *info);
- int (*op_rfkill_sw_toggle)(struct wimax_dev *wimax_dev,
- enum wimax_rf_state);
- int (*op_reset)(struct wimax_dev *wimax_dev);
-
- struct rfkill *rfkill;
- unsigned int rf_hw;
- unsigned int rf_sw;
- char name[32];
-
- struct dentry *debugfs_dentry;
-};
-
-
-
-/*
- * WiMAX stack public API for device drivers
- * -----------------------------------------
- *
- * These functions are not exported to user space.
- */
-void wimax_dev_init(struct wimax_dev *);
-int wimax_dev_add(struct wimax_dev *, struct net_device *);
-void wimax_dev_rm(struct wimax_dev *);
-
-static inline
-struct wimax_dev *net_dev_to_wimax(struct net_device *net_dev)
-{
- return netdev_priv(net_dev);
-}
-
-static inline
-struct device *wimax_dev_to_dev(struct wimax_dev *wimax_dev)
-{
- return wimax_dev->net_dev->dev.parent;
-}
-
-void wimax_state_change(struct wimax_dev *, enum wimax_st);
-enum wimax_st wimax_state_get(struct wimax_dev *);
-
-/*
- * Radio Switch state reporting.
- *
- * enum wimax_rf_state is declared in linux/wimax.h so the exports
- * to user space can use it.
- */
-void wimax_report_rfkill_hw(struct wimax_dev *, enum wimax_rf_state);
-void wimax_report_rfkill_sw(struct wimax_dev *, enum wimax_rf_state);
-
-
-/*
- * Free-form messaging to/from user space
- *
- * Sending a message:
- *
- * wimax_msg(wimax_dev, pipe_name, buf, buf_size, GFP_KERNEL);
- *
- * Broken up:
- *
- * skb = wimax_msg_alloc(wimax_dev, pipe_name, buf_size, GFP_KERNEL);
- * ...fill up skb...
- * wimax_msg_send(wimax_dev, pipe_name, skb);
- *
- * Be sure not to modify skb->data in the middle (ie: don't use
- * skb_push()/skb_pull()/skb_reserve() on the skb).
- *
- * "pipe_name" is any string, that can be interpreted as the name of
- * the pipe or recipient; the interpretation of it is driver
- * specific, so the recipient can multiplex it as wished. It can be
- * NULL, it won't be used - an example is using a "diagnostics" tag to
- * send diagnostics information that a device-specific diagnostics
- * tool would be interested in.
- */
-struct sk_buff *wimax_msg_alloc(struct wimax_dev *, const char *, const void *,
- size_t, gfp_t);
-int wimax_msg_send(struct wimax_dev *, struct sk_buff *);
-int wimax_msg(struct wimax_dev *, const char *, const void *, size_t, gfp_t);
-
-const void *wimax_msg_data_len(struct sk_buff *, size_t *);
-const void *wimax_msg_data(struct sk_buff *);
-ssize_t wimax_msg_len(struct sk_buff *);
-
-
-/*
- * WiMAX stack user space API
- * --------------------------
- *
- * This API is what gets exported to user space for general
- * operations. As well, they can be called from within the kernel,
- * (with a properly referenced `struct wimax_dev`).
- *
- * Properly referenced means: the 'struct net_device' that embeds the
- * device's control structure and (as such) the 'struct wimax_dev' is
- * referenced by the caller.
- */
-int wimax_rfkill(struct wimax_dev *, enum wimax_rf_state);
-int wimax_reset(struct wimax_dev *);
-
-#endif /* #ifndef __NET__WIMAX_H__ */
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 40c6d3398458..55dbc68bfffc 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -6,6 +6,8 @@
#ifndef __LINUX_NET_XDP_H__
#define __LINUX_NET_XDP_H__
+#include <linux/skbuff.h> /* skb_shared_info */
+
/**
* DOC: XDP RX-queue information
*
@@ -13,13 +15,13 @@
* level RX-ring queues. It is information that is specific to how
* the driver have configured a given RX-ring queue.
*
- * Each xdp_buff frame received in the driver carry a (pointer)
+ * Each xdp_buff frame received in the driver carries a (pointer)
* reference to this xdp_rxq_info structure. This provides the XDP
* data-path read-access to RX-info for both kernel and bpf-side
* (limited subset).
*
* For now, direct access is only safe while running in NAPI/softirq
- * context. Contents is read-mostly and must not be updated during
+ * context. Contents are read-mostly and must not be updated during
* driver NAPI/softirq poll.
*
* The driver usage API is a register and unregister API.
@@ -28,8 +30,8 @@
* can be attached as long as it doesn't change the underlying
* RX-ring. If the RX-ring does change significantly, the NIC driver
* naturally need to stop the RX-ring before purging and reallocating
- * memory. In that process the driver MUST call unregistor (which
- * also apply for driver shutdown and unload). The register API is
+ * memory. In that process the driver MUST call unregister (which
+ * also applies for driver shutdown and unload). The register API is
* also mandatory during RX-ring setup.
*/
@@ -37,7 +39,7 @@ enum xdp_mem_type {
MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */
MEM_TYPE_PAGE_POOL,
- MEM_TYPE_ZERO_COPY,
+ MEM_TYPE_XSK_BUFF_POOL,
MEM_TYPE_MAX,
};
@@ -52,36 +54,162 @@ struct xdp_mem_info {
struct page_pool;
-struct zero_copy_allocator {
- void (*free)(struct zero_copy_allocator *zca, unsigned long handle);
-};
-
struct xdp_rxq_info {
struct net_device *dev;
u32 queue_index;
u32 reg_state;
struct xdp_mem_info mem;
+ unsigned int napi_id;
+ u32 frag_size;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+struct xdp_txq_info {
+ struct net_device *dev;
+};
+
+enum xdp_buff_flags {
+ XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */
+ XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under
+ * pressure
+ */
+};
+
struct xdp_buff {
void *data;
void *data_end;
void *data_meta;
void *data_hard_start;
- unsigned long handle;
struct xdp_rxq_info *rxq;
+ struct xdp_txq_info *txq;
+ u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+ u32 flags; /* supported values defined in xdp_buff_flags */
};
+static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+{
+ return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
+{
+ xdp->flags |= XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
+{
+ xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
+static __always_inline void
+xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
+{
+ xdp->frame_sz = frame_sz;
+ xdp->rxq = rxq;
+ xdp->flags = 0;
+}
+
+static __always_inline void
+xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start,
+ int headroom, int data_len, const bool meta_valid)
+{
+ unsigned char *data = hard_start + headroom;
+
+ xdp->data_hard_start = hard_start;
+ xdp->data = data;
+ xdp->data_end = data + data_len;
+ xdp->data_meta = meta_valid ? data : data + 1;
+}
+
+/* Reserve memory area at end-of data area.
+ *
+ * This macro reserves tailroom in the XDP buffer by limiting the
+ * XDP/BPF data access to data_hard_end. Notice same area (and size)
+ * is used for XDP_PASS, when constructing the SKB via build_skb().
+ */
+#define xdp_data_hard_end(xdp) \
+ ((xdp)->data_hard_start + (xdp)->frame_sz - \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+static inline struct skb_shared_info *
+xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
+{
+ return (struct skb_shared_info *)xdp_data_hard_end(xdp);
+}
+
+static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+{
+ unsigned int len = xdp->data_end - xdp->data;
+ struct skb_shared_info *sinfo;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ len += sinfo->xdp_frags_size;
+out:
+ return len;
+}
+
struct xdp_frame {
void *data;
u16 len;
u16 headroom;
- u16 metasize;
+ u32 metasize; /* uses lower 8-bits */
/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
* while mem info is valid on remote CPU.
*/
struct xdp_mem_info mem;
struct net_device *dev_rx; /* used by cpumap */
+ u32 frame_sz;
+ u32 flags; /* supported values defined in xdp_buff_flags */
+};
+
+static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+{
+ return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+{
+ return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+#define XDP_BULK_QUEUE_SIZE 16
+struct xdp_frame_bulk {
+ int count;
+ void *xa;
+ void *q[XDP_BULK_QUEUE_SIZE];
+};
+
+static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
+{
+ /* bq->count will be zero'ed when bq->xa gets updated */
+ bq->xa = NULL;
+}
+
+static inline struct skb_shared_info *
+xdp_get_shared_info_from_frame(struct xdp_frame *frame)
+{
+ void *data_hard_start = frame->data - frame->headroom - sizeof(*frame);
+
+ return (struct skb_shared_info *)(data_hard_start + frame->frame_sz -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+}
+
+struct xdp_cpumap_stats {
+ unsigned int redirect;
+ unsigned int pass;
+ unsigned int drop;
};
/* Clear kernel pointers in xdp_frame */
@@ -91,33 +219,85 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
frame->dev_rx = NULL;
}
+static inline void
+xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
+ unsigned int size, unsigned int truesize,
+ bool pfmemalloc)
+{
+ skb_shinfo(skb)->nr_frags = nr_frags;
+
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += truesize;
+ skb->pfmemalloc |= pfmemalloc;
+}
+
+/* Avoids inlining WARN macro in fast-path */
+void xdp_warn(const char *msg, const char *func, const int line);
+#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
+
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
+struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
+ struct sk_buff *skb,
+ struct net_device *dev);
+struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
+ struct net_device *dev);
+int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
-/* Convert xdp_buff to xdp_frame */
static inline
-struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
{
- struct xdp_frame *xdp_frame;
- int metasize;
- int headroom;
+ xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame);
+ xdp->data = frame->data;
+ xdp->data_end = frame->data + frame->len;
+ xdp->data_meta = frame->data - frame->metasize;
+ xdp->frame_sz = frame->frame_sz;
+ xdp->flags = frame->flags;
+}
- if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
- return xdp_convert_zc_to_xdp_frame(xdp);
+static inline
+int xdp_update_frame_from_buff(struct xdp_buff *xdp,
+ struct xdp_frame *xdp_frame)
+{
+ int metasize, headroom;
/* Assure headroom is available for storing info */
headroom = xdp->data - xdp->data_hard_start;
metasize = xdp->data - xdp->data_meta;
metasize = metasize > 0 ? metasize : 0;
if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
- return NULL;
+ return -ENOSPC;
- /* Store info in top of packet */
- xdp_frame = xdp->data_hard_start;
+ /* Catch if driver didn't reserve tailroom for skb_shared_info */
+ if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
+ XDP_WARN("Driver BUG: missing reserved tailroom");
+ return -ENOSPC;
+ }
xdp_frame->data = xdp->data;
xdp_frame->len = xdp->data_end - xdp->data;
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
xdp_frame->metasize = metasize;
+ xdp_frame->frame_sz = xdp->frame_sz;
+ xdp_frame->flags = xdp->flags;
+
+ return 0;
+}
+
+/* Convert xdp_buff to xdp_frame */
+static inline
+struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdp_frame;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+ return xdp_convert_zc_to_xdp_frame(xdp);
+
+ /* Store info in top of packet */
+ xdp_frame = xdp->data_hard_start;
+ if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0))
+ return NULL;
/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
xdp_frame->mem = xdp->rxq->mem;
@@ -125,9 +305,14 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
return xdp_frame;
}
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+ struct xdp_buff *xdp);
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
+void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
+void xdp_return_frame_bulk(struct xdp_frame *xdpf,
+ struct xdp_frame_bulk *bq);
/* When sending xdp_frame into the network stack, then there is no
* return point callback, which is needed to release e.g. DMA-mapping
@@ -138,20 +323,60 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
struct xdp_mem_info *mem = &xdpf->mem;
+ struct skb_shared_info *sinfo;
+ int i;
/* Curr only page_pool needs this */
- if (mem->type == MEM_TYPE_PAGE_POOL)
- __xdp_release_frame(xdpf->data, mem);
+ if (mem->type != MEM_TYPE_PAGE_POOL)
+ return;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_release_frame(page_address(page), mem);
+ }
+out:
+ __xdp_release_frame(xdpf->data, mem);
+}
+
+static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
+{
+ struct skb_shared_info *sinfo;
+ unsigned int len = xdpf->len;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ len += sinfo->xdp_frags_size;
+out:
+ return len;
+}
+
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id, u32 frag_size);
+static inline int
+xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id)
+{
+ return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
}
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
- struct net_device *dev, u32 queue_index);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
enum xdp_mem_type type, void *allocator);
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
+int xdp_reg_mem_model(struct xdp_mem_info *mem,
+ enum xdp_mem_type type, void *allocator);
+void xdp_unreg_mem_model(struct xdp_mem_info *mem);
/* Drivers not supporting XDP metadata can use this helper, which
* rejects any room expansion for metadata as a result.
@@ -168,17 +393,20 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
return unlikely(xdp->data_meta > xdp->data);
}
+static inline bool xdp_metalen_invalid(unsigned long metalen)
+{
+ return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
+}
+
struct xdp_attachment_info {
struct bpf_prog *prog;
u32 flags;
};
struct netdev_bpf;
-int xdp_attachment_query(struct xdp_attachment_info *info,
- struct netdev_bpf *bpf);
-bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
- struct netdev_bpf *bpf);
void xdp_attachment_setup(struct xdp_attachment_info *info,
struct netdev_bpf *bpf);
+#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
+
#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
index a9d5b7603b89..c9df68d5f258 100644
--- a/include/net/xdp_priv.h
+++ b/include/net/xdp_priv.h
@@ -3,6 +3,7 @@
#define __LINUX_NET_XDP_PRIV_H__
#include <linux/rhashtable.h>
+#include <net/xdp.h>
/* Private to net/core/xdp.c, but used by trace/events/xdp.h */
struct xdp_mem_allocator {
@@ -10,7 +11,6 @@ struct xdp_mem_allocator {
union {
void *allocator;
struct page_pool *page_pool;
- struct zero_copy_allocator *zc_alloc;
};
struct rhash_head node;
struct rcu_head rcu;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e86ec48ef627..3057e1a4a11c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -6,6 +6,7 @@
#ifndef _LINUX_XDP_SOCK_H
#define _LINUX_XDP_SOCK_H
+#include <linux/bpf.h>
#include <linux/workqueue.h>
#include <linux/if_xdp.h>
#include <linux/mutex.h>
@@ -15,80 +16,39 @@
struct net_device;
struct xsk_queue;
-
-/* Masks for xdp_umem_page flags.
- * The low 12-bits of the addr will be 0 since this is the page address, so we
- * can use them for flags.
- */
-#define XSK_NEXT_PG_CONTIG_SHIFT 0
-#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT)
-
-struct xdp_umem_page {
- void *addr;
- dma_addr_t dma;
-};
-
-struct xdp_umem_fq_reuse {
- u32 nentries;
- u32 length;
- u64 handles[];
-};
-
-/* Flags for the umem flags field.
- *
- * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
- * flags. See inlude/uapi/include/linux/if_xdp.h.
- */
-#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1)
+struct xdp_buff;
struct xdp_umem {
- struct xsk_queue *fq;
- struct xsk_queue *cq;
- struct xdp_umem_page *pages;
- u64 chunk_mask;
+ void *addrs;
u64 size;
u32 headroom;
- u32 chunk_size_nohr;
+ u32 chunk_size;
+ u32 chunks;
+ u32 npgs;
struct user_struct *user;
- unsigned long address;
refcount_t users;
- struct work_struct work;
- struct page **pgs;
- u32 npgs;
- u16 queue_id;
- u8 need_wakeup;
u8 flags;
- int id;
- struct net_device *dev;
- struct xdp_umem_fq_reuse *fq_reuse;
bool zc;
- spinlock_t xsk_list_lock;
- struct list_head xsk_list;
+ struct page **pgs;
+ int id;
+ struct list_head xsk_dma_list;
+ struct work_struct work;
};
-/* Nodes are linked in the struct xdp_sock map_list field, and used to
- * track which maps a certain socket reside in.
- */
-
struct xsk_map {
struct bpf_map map;
spinlock_t lock; /* Synchronize map updates */
- struct xdp_sock *xsk_map[];
-};
-
-struct xsk_map_node {
- struct list_head node;
- struct xsk_map *map;
- struct xdp_sock **map_entry;
+ struct xdp_sock __rcu *xsk_map[];
};
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
struct sock sk;
- struct xsk_queue *rx;
+ struct xsk_queue *rx ____cacheline_aligned_in_smp;
struct net_device *dev;
struct xdp_umem *umem;
struct list_head flush_node;
+ struct xsk_buff_pool *pool;
u16 queue_id;
bool zc;
enum {
@@ -96,277 +56,38 @@ struct xdp_sock {
XSK_BOUND,
XSK_UNBOUND,
} state;
- /* Protects multiple processes in the control path */
- struct mutex mutex;
+
struct xsk_queue *tx ____cacheline_aligned_in_smp;
- struct list_head list;
- /* Mutual exclusion of NAPI TX thread and sendmsg error paths
- * in the SKB destructor callback.
- */
- spinlock_t tx_completion_lock;
+ struct list_head tx_list;
/* Protects generic receive. */
spinlock_t rx_lock;
+
+ /* Statistics */
u64 rx_dropped;
+ u64 rx_queue_full;
+
struct list_head map_list;
/* Protects map_list */
spinlock_t map_list_lock;
+ /* Protects multiple processes in the control path */
+ struct mutex mutex;
+ struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
+ struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
};
-struct xdp_buff;
#ifdef CONFIG_XDP_SOCKETS
-int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
-/* Used from netdev driver */
-bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
-bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
-void xsk_umem_release_addr(struct xdp_umem *umem);
-void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
-void xsk_umem_consume_tx_done(struct xdp_umem *umem);
-struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
-struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
- struct xdp_umem_fq_reuse *newq);
-void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
-struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
-void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
-bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
-void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
- struct xdp_sock **map_entry);
-int xsk_map_inc(struct xsk_map *map);
-void xsk_map_put(struct xsk_map *map);
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *xs;
-
- if (key >= map->max_entries)
- return NULL;
-
- xs = READ_ONCE(m->xsk_map[key]);
- return xs;
-}
-
-static inline u64 xsk_umem_extract_addr(u64 addr)
-{
- return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
-}
-
-static inline u64 xsk_umem_extract_offset(u64 addr)
-{
- return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-}
-
-static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
-{
- return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
-}
-
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
- unsigned long page_addr;
-
- addr = xsk_umem_add_offset_to_addr(addr);
- page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
-
- return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
- addr = xsk_umem_add_offset_to_addr(addr);
-
- return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
-}
-
-/* Reuse-queue aware version of FILL queue helpers */
-static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
-{
- struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
- if (rq->length >= cnt)
- return true;
-
- return xsk_umem_has_addrs(umem, cnt - rq->length);
-}
-
-static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
-{
- struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
- if (!rq->length)
- return xsk_umem_peek_addr(umem, addr);
-
- *addr = rq->handles[rq->length - 1];
- return addr;
-}
-
-static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
-{
- struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
- if (!rq->length)
- xsk_umem_release_addr(umem);
- else
- rq->length--;
-}
-
-static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
-{
- struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
- rq->handles[rq->length++] = addr;
-}
-
-/* Handle the offset appropriately depending on aligned or unaligned mode.
- * For unaligned mode, we store the offset in the upper 16-bits of the address.
- * For aligned mode, we simply add the offset to the address.
- */
-static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
- u64 offset)
-{
- if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
- return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
- else
- return address + offset;
-}
#else
+
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -ENOTSUPP;
}
-static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
-{
- return false;
-}
-
-static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
-{
- return false;
-}
-
-static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
-{
- return NULL;
-}
-
-static inline void xsk_umem_release_addr(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
-{
-}
-
-static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
- struct xdp_desc *desc)
-{
- return false;
-}
-
-static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
-{
-}
-
-static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
-{
- return NULL;
-}
-
-static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
- struct xdp_umem *umem,
- struct xdp_umem_fq_reuse *newq)
-{
- return NULL;
-}
-static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
-{
-}
-
-static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
- u16 queue_id)
-{
- return NULL;
-}
-
-static inline u64 xsk_umem_extract_addr(u64 addr)
-{
- return 0;
-}
-
-static inline u64 xsk_umem_extract_offset(u64 addr)
-{
- return 0;
-}
-
-static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
-{
- return 0;
-}
-
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
- return NULL;
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
- return 0;
-}
-
-static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
-{
- return false;
-}
-
-static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
-{
- return NULL;
-}
-
-static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
-{
-}
-
-static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
-{
- return false;
-}
-
-static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
- u64 offset)
-{
- return 0;
-}
-
static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -EOPNOTSUPP;
@@ -376,11 +97,6 @@ static inline void __xsk_map_flush(void)
{
}
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
new file mode 100644
index 000000000000..9c0d860609ba
--- /dev/null
+++ b/include/net/xdp_sock_drv.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Interface for implementing AF_XDP zero-copy support in drivers.
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef _LINUX_XDP_SOCK_DRV_H
+#define _LINUX_XDP_SOCK_DRV_H
+
+#include <net/xdp_sock.h>
+#include <net/xsk_buff_pool.h>
+
+#define XDP_UMEM_MIN_CHUNK_SHIFT 11
+#define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT)
+
+#ifdef CONFIG_XDP_SOCKETS
+
+void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
+bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
+void xsk_tx_release(struct xsk_buff_pool *pool);
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
+ u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool);
+bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool);
+
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
+{
+ return XDP_PACKET_HEADROOM + pool->headroom;
+}
+
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
+{
+ return pool->chunk_size;
+}
+
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
+{
+ return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
+}
+
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
+ struct xdp_rxq_info *rxq)
+{
+ xp_set_rxq_info(pool, rxq);
+}
+
+static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ return pool->heads[0].xdp.rxq->napi_id;
+#else
+ return 0;
+#endif
+}
+
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
+ unsigned long attrs)
+{
+ xp_dma_unmap(pool, attrs);
+}
+
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+ struct device *dev, unsigned long attrs)
+{
+ struct xdp_umem *umem = pool->umem;
+
+ return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
+{
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+ return xp_get_dma(xskb);
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
+{
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+ return xp_get_frame_dma(xskb);
+}
+
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
+{
+ return xp_alloc(pool);
+}
+
+/* Returns as many entries as possible up to max. 0 <= N <= max. */
+static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ return xp_alloc_batch(pool, xdp, max);
+}
+
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
+{
+ return xp_can_alloc(pool, count);
+}
+
+static inline void xsk_buff_free(struct xdp_buff *xdp)
+{
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+ xp_free(xskb);
+}
+
+static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
+{
+ xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
+ xdp->data_meta = xdp->data;
+ xdp->data_end = xdp->data + size;
+}
+
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+ u64 addr)
+{
+ return xp_raw_get_dma(pool, addr);
+}
+
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+{
+ return xp_raw_get_data(pool, addr);
+}
+
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
+{
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+ if (!pool->dma_need_sync)
+ return;
+
+ xp_dma_sync_for_cpu(xskb);
+}
+
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
+ dma_addr_t dma,
+ size_t size)
+{
+ xp_dma_sync_for_device(pool, dma, size);
+}
+
+#else
+
+static inline void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
+{
+}
+
+static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
+ struct xdp_desc *desc)
+{
+ return false;
+}
+
+static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max)
+{
+ return 0;
+}
+
+static inline void xsk_tx_release(struct xsk_buff_pool *pool)
+{
+}
+
+static inline struct xsk_buff_pool *
+xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id)
+{
+ return NULL;
+}
+
+static inline void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
+{
+}
+
+static inline void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
+{
+}
+
+static inline void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
+{
+}
+
+static inline void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
+{
+}
+
+static inline bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
+{
+ return false;
+}
+
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
+{
+ return 0;
+}
+
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
+{
+ return 0;
+}
+
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
+{
+ return 0;
+}
+
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
+ struct xdp_rxq_info *rxq)
+{
+}
+
+static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
+{
+ return 0;
+}
+
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
+ unsigned long attrs)
+{
+}
+
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+ struct device *dev, unsigned long attrs)
+{
+ return 0;
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
+{
+ return 0;
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
+{
+ return 0;
+}
+
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
+{
+ return NULL;
+}
+
+static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ return 0;
+}
+
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
+{
+ return false;
+}
+
+static inline void xsk_buff_free(struct xdp_buff *xdp)
+{
+}
+
+static inline void xsk_buff_discard(struct xdp_buff *xdp)
+{
+}
+
+static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
+{
+}
+
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+ u64 addr)
+{
+ return 0;
+}
+
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+{
+ return NULL;
+}
+
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
+{
+}
+
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
+ dma_addr_t dma,
+ size_t size)
+{
+}
+
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_XDP_SOCK_DRV_H */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8f71c111e65a..dbc81f5eb553 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -15,6 +15,7 @@
#include <linux/audit.h>
#include <linux/slab.h>
#include <linux/refcount.h>
+#include <linux/sockptr.h>
#include <net/sock.h>
#include <net/dst.h>
@@ -125,11 +126,17 @@ struct xfrm_state_walk {
struct xfrm_address_filter *filter;
};
-struct xfrm_state_offload {
+enum {
+ XFRM_DEV_OFFLOAD_IN = 1,
+ XFRM_DEV_OFFLOAD_OUT,
+};
+
+struct xfrm_dev_offload {
struct net_device *dev;
+ netdevice_tracker dev_tracker;
+ struct net_device *real_dev;
unsigned long offload_handle;
- unsigned int num_exthdrs;
- u8 flags;
+ u8 dir : 2;
};
struct xfrm_mode {
@@ -143,6 +150,12 @@ enum {
XFRM_MODE_FLAG_TUNNEL = 1,
};
+enum xfrm_replay_mode {
+ XFRM_REPLAY_MODE_LEGACY,
+ XFRM_REPLAY_MODE_BMP,
+ XFRM_REPLAY_MODE_ESN,
+};
+
/* Full description of state of transformer. */
struct xfrm_state {
possible_net_t xs_net;
@@ -152,6 +165,7 @@ struct xfrm_state {
};
struct hlist_node bysrc;
struct hlist_node byspi;
+ struct hlist_node byseq;
refcount_t refcnt;
spinlock_t lock;
@@ -191,6 +205,11 @@ struct xfrm_state {
struct xfrm_algo_aead *aead;
const char *geniv;
+ /* mapping change rate limiting */
+ __be16 new_mapping_sport;
+ u32 new_mapping; /* seconds */
+ u32 mapping_maxage; /* seconds for input SA */
+
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
struct sock __rcu *encap_sk;
@@ -212,9 +231,8 @@ struct xfrm_state {
struct xfrm_replay_state preplay;
struct xfrm_replay_state_esn *preplay_esn;
- /* The functions for replay detection. */
- const struct xfrm_replay *repl;
-
+ /* replay detection mode */
+ enum xfrm_replay_mode repl_mode;
/* internal flag that only holds state for delayed aevent at the
* moment
*/
@@ -233,7 +251,7 @@ struct xfrm_state {
struct xfrm_lifetime_cur curlft;
struct hrtimer mtimer;
- struct xfrm_state_offload xso;
+ struct xfrm_dev_offload xso;
/* used to fix curlft->add_time when changing date */
long saved_tmo;
@@ -294,21 +312,15 @@ struct km_event {
struct net *net;
};
-struct xfrm_replay {
- void (*advance)(struct xfrm_state *x, __be32 net_seq);
- int (*check)(struct xfrm_state *x,
- struct sk_buff *skb,
- __be32 net_seq);
- int (*recheck)(struct xfrm_state *x,
- struct sk_buff *skb,
- __be32 net_seq);
- void (*notify)(struct xfrm_state *x, int event);
- int (*overflow)(struct xfrm_state *x, struct sk_buff *skb);
+struct xfrm_if_decode_session_result {
+ struct net *net;
+ u32 if_id;
};
struct xfrm_if_cb {
- struct xfrm_if *(*decode_session)(struct sk_buff *skb,
- unsigned short family);
+ bool (*decode_session)(struct sk_buff *skb,
+ unsigned short family,
+ struct xfrm_if_decode_session_result *res);
};
void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
@@ -361,11 +373,6 @@ struct xfrm_state_afinfo {
const struct xfrm_type *type_dstopts;
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
- int (*output_finish)(struct sock *sk, struct sk_buff *skb);
- int (*extract_input)(struct xfrm_state *x,
- struct sk_buff *skb);
- int (*extract_output)(struct xfrm_state *x,
- struct sk_buff *skb);
int (*transport_finish)(struct sk_buff *skb,
int async);
void (*local_error)(struct sk_buff *skb, u32 mtu);
@@ -377,7 +384,8 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family);
struct xfrm_input_afinfo {
- unsigned int family;
+ u8 family;
+ bool is_ipip;
int (*callback)(struct sk_buff *skb, u8 protocol,
int err);
};
@@ -389,7 +397,6 @@ void xfrm_flush_gc(void);
void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_type {
- char *description;
struct module *owner;
u8 proto;
u8 flags;
@@ -398,20 +405,19 @@ struct xfrm_type {
#define XFRM_TYPE_LOCAL_COADDR 4
#define XFRM_TYPE_REMOTE_COADDR 8
- int (*init_state)(struct xfrm_state *x);
+ int (*init_state)(struct xfrm_state *x,
+ struct netlink_ext_ack *extack);
void (*destructor)(struct xfrm_state *);
int (*input)(struct xfrm_state *, struct sk_buff *skb);
int (*output)(struct xfrm_state *, struct sk_buff *pskb);
int (*reject)(struct xfrm_state *, struct sk_buff *,
const struct flowi *);
- int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
};
int xfrm_register_type(const struct xfrm_type *type, unsigned short family);
void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family);
struct xfrm_type_offload {
- char *description;
struct module *owner;
u8 proto;
void (*encap)(struct xfrm_state *, struct sk_buff *pskb);
@@ -584,8 +590,8 @@ struct xfrm_mgr {
bool (*is_alive)(const struct km_event *c);
};
-int xfrm_register_km(struct xfrm_mgr *km);
-int xfrm_unregister_km(struct xfrm_mgr *km);
+void xfrm_register_km(struct xfrm_mgr *km);
+void xfrm_unregister_km(struct xfrm_mgr *km);
struct xfrm_tunnel_skb_cb {
union {
@@ -946,7 +952,7 @@ struct xfrm_dst {
static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
- if (dst->xfrm) {
+ if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
return xdst->path;
@@ -958,7 +964,7 @@ static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
- if (dst->xfrm) {
+ if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
return xdst->child;
}
@@ -986,6 +992,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
struct xfrm_if_parms {
int link; /* ifindex of underlying L2 interface */
u32 if_id; /* interface identifyer */
+ bool collect_md;
};
struct xfrm_if {
@@ -1011,8 +1018,9 @@ struct xfrm_offload {
#define CRYPTO_FALLBACK 8
#define XFRM_GSO_SEGMENT 16
#define XFRM_GRO 32
-#define XFRM_ESP_NO_TRAILER 64
+/* 64 is free */
#define XFRM_DEV_RESUME 128
+#define XFRM_XMIT 256
__u32 status;
#define CRYPTO_SUCCESS 1
@@ -1025,6 +1033,7 @@ struct xfrm_offload {
#define CRYPTO_INVALID_PROTOCOL 128
__u8 proto;
+ __u8 inner_ipproto;
};
struct sec_path {
@@ -1087,6 +1096,27 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un
int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
unsigned short family);
+static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb,
+ int dir)
+{
+ if (!net->xfrm.policy_count[dir] && !secpath_exists(skb))
+ return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT;
+
+ return false;
+}
+
+static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb,
+ int dir, unsigned short family)
+{
+ if (dir != XFRM_POLICY_OUT && family == AF_INET) {
+ /* same dst may be used for traffic originating from
+ * devices with different policy settings.
+ */
+ return IPCB(skb)->flags & IPSKB_NOPOLICY;
+ }
+ return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
+}
+
static inline int __xfrm_policy_check2(struct sock *sk, int dir,
struct sk_buff *skb,
unsigned int family, int reverse)
@@ -1097,9 +1127,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
if (sk && sk->sk_policy[XFRM_POLICY_IN])
return __xfrm_policy_check(sk, ndir, skb, family);
- return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
- (skb_dst(skb)->flags & DST_NOPOLICY) ||
- __xfrm_policy_check(sk, ndir, skb, family);
+ return __xfrm_check_nopolicy(net, skb, dir) ||
+ __xfrm_check_dev_nopolicy(skb, dir, family) ||
+ __xfrm_policy_check(sk, ndir, skb, family);
}
static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
@@ -1151,9 +1181,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
{
struct net *net = dev_net(skb->dev);
- return !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
- (skb_dst(skb)->flags & DST_NOXFRM) ||
- __xfrm_route_forward(skb, family);
+ if (!net->xfrm.policy_count[XFRM_POLICY_OUT] &&
+ net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT)
+ return true;
+
+ return (skb_dst(skb)->flags & DST_NOXFRM) ||
+ __xfrm_route_forward(skb, family);
}
static inline int xfrm4_route_forward(struct sk_buff *skb)
@@ -1170,6 +1203,8 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk);
static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
{
+ if (!sk_fullsock(osk))
+ return 0;
sk->sk_policy[0] = NULL;
sk->sk_policy[1] = NULL;
if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
@@ -1406,6 +1441,8 @@ struct xfrm4_protocol {
struct xfrm6_protocol {
int (*handler)(struct sk_buff *skb);
+ int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type);
int (*cb_handler)(struct sk_buff *skb, int err);
int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info);
@@ -1417,6 +1454,7 @@ struct xfrm6_protocol {
/* XFRM tunnel handlers. */
struct xfrm_tunnel {
int (*handler)(struct sk_buff *skb);
+ int (*cb_handler)(struct sk_buff *skb, int err);
int (*err_handler)(struct sk_buff *skb, u32 info);
struct xfrm_tunnel __rcu *next;
@@ -1425,6 +1463,7 @@ struct xfrm_tunnel {
struct xfrm6_tunnel {
int (*handler)(struct sk_buff *skb);
+ int (*cb_handler)(struct sk_buff *skb, int err);
int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info);
struct xfrm6_tunnel __rcu *next;
@@ -1542,9 +1581,10 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
-int xfrm_init_replay(struct xfrm_state *x);
+int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
+ struct netlink_ext_ack *extack);
int xfrm_init_state(struct xfrm_state *x);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
@@ -1554,7 +1594,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
int xfrm_trans_queue(struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
struct sk_buff *));
-int xfrm_output_resume(struct sk_buff *skb, int err);
+int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
#if IS_ENABLED(CONFIG_NET_PKTGEN)
@@ -1562,13 +1602,11 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
#endif
void xfrm_local_error(struct sk_buff *skb, int mtu);
-int xfrm4_extract_header(struct sk_buff *skb);
int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm4_transport_finish(struct sk_buff *skb, int async);
int xfrm4_rcv(struct sk_buff *skb);
-int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq);
static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
{
@@ -1578,18 +1616,17 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
return xfrm_input(skb, nexthdr, spi, 0);
}
-int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb);
-int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb);
int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol);
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_extract_header(struct sk_buff *skb);
int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t);
+int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type);
int xfrm6_transport_finish(struct sk_buff *skb, int async);
int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
int xfrm6_rcv(struct sk_buff *skb);
@@ -1602,18 +1639,17 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family);
__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
-int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
-int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb);
-int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
- u8 **prevhdr);
#ifdef CONFIG_XFRM
+void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
-int xfrm_user_policy(struct sock *sk, int optname,
- u8 __user *optval, int optlen);
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
+ int optlen);
#else
-static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
+static inline int xfrm_user_policy(struct sock *sk, int optname,
+ sockptr_t optval, int optlen)
{
return -ENOPROTOOPT;
}
@@ -1632,13 +1668,16 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
void *);
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net,
+ const struct xfrm_mark *mark,
+ u32 if_id, u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
- int dir, u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net,
+ const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete,
+ int *err);
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
void xfrm_policy_hash_rebuild(struct net *net);
u32 xfrm_get_acqseq(void);
@@ -1656,14 +1695,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
const struct xfrm_kmaddress *k,
const struct xfrm_encap_tmpl *encap);
-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net);
+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
+ u32 if_id);
struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m,
struct xfrm_encap_tmpl *encap);
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_bundles,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap);
+ struct xfrm_encap_tmpl *encap, u32 if_id);
#endif
int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
@@ -1714,6 +1754,12 @@ static inline int xfrm_policy_id2dir(u32 index)
}
#ifdef CONFIG_XFRM
+void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq);
+int xfrm_replay_check(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
+void xfrm_replay_notify(struct xfrm_state *x, int event);
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb);
+int xfrm_replay_recheck(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
+
static inline int xfrm_aevent_is_on(struct net *net)
{
struct sock *nlsk;
@@ -1766,21 +1812,17 @@ static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_es
static inline int xfrm_replay_clone(struct xfrm_state *x,
struct xfrm_state *orig)
{
- x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn),
+
+ x->replay_esn = kmemdup(orig->replay_esn,
+ xfrm_replay_state_esn_len(orig->replay_esn),
GFP_KERNEL);
if (!x->replay_esn)
return -ENOMEM;
-
- x->replay_esn->bmp_len = orig->replay_esn->bmp_len;
- x->replay_esn->replay_window = orig->replay_esn->replay_window;
-
- x->preplay_esn = kmemdup(x->replay_esn,
- xfrm_replay_state_esn_len(x->replay_esn),
+ x->preplay_esn = kmemdup(orig->preplay_esn,
+ xfrm_replay_state_esn_len(orig->preplay_esn),
GFP_KERNEL);
- if (!x->preplay_esn) {
- kfree(x->replay_esn);
+ if (!x->preplay_esn)
return -ENOMEM;
- }
return 0;
}
@@ -1846,12 +1888,13 @@ void xfrm_dev_resume(struct sk_buff *skb);
void xfrm_dev_backlog(struct softnet_data *sd);
struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
- struct xfrm_user_offload *xuo);
+ struct xfrm_user_offload *xuo,
+ struct netlink_ext_ack *extack);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
{
- struct xfrm_state_offload *xso = &x->xso;
+ struct xfrm_dev_offload *xso = &x->xso;
if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
@@ -1877,7 +1920,7 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
static inline void xfrm_dev_state_delete(struct xfrm_state *x)
{
- struct xfrm_state_offload *xso = &x->xso;
+ struct xfrm_dev_offload *xso = &x->xso;
if (xso->dev)
xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
@@ -1885,14 +1928,14 @@ static inline void xfrm_dev_state_delete(struct xfrm_state *x)
static inline void xfrm_dev_state_free(struct xfrm_state *x)
{
- struct xfrm_state_offload *xso = &x->xso;
+ struct xfrm_dev_offload *xso = &x->xso;
struct net_device *dev = xso->dev;
if (dev && dev->xfrmdev_ops) {
if (dev->xfrmdev_ops->xdo_dev_state_free)
dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
- dev_put(dev);
+ netdev_put(dev, &xso->dev_tracker);
}
}
#else
@@ -1909,7 +1952,7 @@ static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_fea
return skb;
}
-static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo)
+static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack)
{
return 0;
}
@@ -1992,4 +2035,53 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
return 0;
}
+
+extern const int xfrm_msg_min[XFRM_NR_MSGTYPES];
+extern const struct nla_policy xfrma_policy[XFRMA_MAX+1];
+
+struct xfrm_translator {
+ /* Allocate frag_list and put compat translation there */
+ int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src);
+
+ /* Allocate nlmsg with 64-bit translaton of received 32-bit message */
+ struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh,
+ int maxtype, const struct nla_policy *policy,
+ struct netlink_ext_ack *extack);
+
+ /* Translate 32-bit user_policy from sockptr */
+ int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen);
+
+ struct module *owner;
+};
+
+#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
+extern int xfrm_register_translator(struct xfrm_translator *xtr);
+extern int xfrm_unregister_translator(struct xfrm_translator *xtr);
+extern struct xfrm_translator *xfrm_get_translator(void);
+extern void xfrm_put_translator(struct xfrm_translator *xtr);
+#else
+static inline struct xfrm_translator *xfrm_get_translator(void)
+{
+ return NULL;
+}
+static inline void xfrm_put_translator(struct xfrm_translator *xtr)
+{
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool xfrm6_local_dontfrag(const struct sock *sk)
+{
+ int proto;
+
+ if (!sk || sk->sk_family != AF_INET6)
+ return false;
+
+ proto = sk->sk_protocol;
+ if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
+ return inet6_sk(sk)->dontfrag;
+
+ return false;
+}
+#endif
#endif /* _NET_XFRM_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
new file mode 100644
index 000000000000..f787c3f524b0
--- /dev/null
+++ b/include/net/xsk_buff_pool.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 Intel Corporation. */
+
+#ifndef XSK_BUFF_POOL_H_
+#define XSK_BUFF_POOL_H_
+
+#include <linux/if_xdp.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/bpf.h>
+#include <net/xdp.h>
+
+struct xsk_buff_pool;
+struct xdp_rxq_info;
+struct xsk_queue;
+struct xdp_desc;
+struct xdp_umem;
+struct xdp_sock;
+struct device;
+struct page;
+
+struct xdp_buff_xsk {
+ struct xdp_buff xdp;
+ dma_addr_t dma;
+ dma_addr_t frame_dma;
+ struct xsk_buff_pool *pool;
+ u64 orig_addr;
+ struct list_head free_list_node;
+};
+
+struct xsk_dma_map {
+ dma_addr_t *dma_pages;
+ struct device *dev;
+ struct net_device *netdev;
+ refcount_t users;
+ struct list_head list; /* Protected by the RTNL_LOCK */
+ u32 dma_pages_cnt;
+ bool dma_need_sync;
+};
+
+struct xsk_buff_pool {
+ /* Members only used in the control path first. */
+ struct device *dev;
+ struct net_device *netdev;
+ struct list_head xsk_tx_list;
+ /* Protects modifications to the xsk_tx_list */
+ spinlock_t xsk_tx_list_lock;
+ refcount_t users;
+ struct xdp_umem *umem;
+ struct work_struct work;
+ struct list_head free_list;
+ u32 heads_cnt;
+ u16 queue_id;
+
+ /* Data path members as close to free_heads at the end as possible. */
+ struct xsk_queue *fq ____cacheline_aligned_in_smp;
+ struct xsk_queue *cq;
+ /* For performance reasons, each buff pool has its own array of dma_pages
+ * even when they are identical.
+ */
+ dma_addr_t *dma_pages;
+ struct xdp_buff_xsk *heads;
+ struct xdp_desc *tx_descs;
+ u64 chunk_mask;
+ u64 addrs_cnt;
+ u32 free_list_cnt;
+ u32 dma_pages_cnt;
+ u32 free_heads_cnt;
+ u32 headroom;
+ u32 chunk_size;
+ u32 chunk_shift;
+ u32 frame_len;
+ u8 cached_need_wakeup;
+ bool uses_need_wakeup;
+ bool dma_need_sync;
+ bool unaligned;
+ void *addrs;
+ /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
+ * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
+ * sockets share a single cq when the same netdev and queue id is shared.
+ */
+ spinlock_t cq_lock;
+ struct xdp_buff_xsk *free_heads[];
+};
+
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
+
+/* AF_XDP core. */
+struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+ struct xdp_umem *umem);
+int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
+ u16 queue_id, u16 flags);
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
+ struct net_device *dev, u16 queue_id);
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs);
+void xp_destroy(struct xsk_buff_pool *pool);
+void xp_get_pool(struct xsk_buff_pool *pool);
+bool xp_put_pool(struct xsk_buff_pool *pool);
+void xp_clear_dev(struct xsk_buff_pool *pool);
+void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
+void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
+
+/* AF_XDP, and XDP core. */
+void xp_free(struct xdp_buff_xsk *xskb);
+
+static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
+ u64 addr)
+{
+ xskb->orig_addr = addr;
+ xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
+}
+
+static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
+ dma_addr_t *dma_pages, u64 addr)
+{
+ xskb->frame_dma = (dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) +
+ (addr & ~PAGE_MASK);
+ xskb->dma = xskb->frame_dma + pool->headroom + XDP_PACKET_HEADROOM;
+}
+
+/* AF_XDP ZC drivers, via xdp_sock_buff.h */
+void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
+int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
+ unsigned long attrs, struct page **pages, u32 nr_pages);
+void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
+struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
+u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max);
+bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
+void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
+dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
+static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
+{
+ return xskb->dma;
+}
+
+static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
+{
+ return xskb->frame_dma;
+}
+
+void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
+static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
+{
+ xp_dma_sync_for_cpu_slow(xskb);
+}
+
+void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
+ size_t size);
+static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool,
+ dma_addr_t dma, size_t size)
+{
+ if (!pool->dma_need_sync)
+ return;
+
+ xp_dma_sync_for_device_slow(pool, dma, size);
+}
+
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
+
+static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
+ u64 addr, u32 len)
+{
+ bool cross_pg = (addr & (PAGE_SIZE - 1)) + len > PAGE_SIZE;
+
+ if (likely(!cross_pg))
+ return false;
+
+ if (pool->dma_pages_cnt) {
+ return !(pool->dma_pages[addr >> PAGE_SHIFT] &
+ XSK_NEXT_PG_CONTIG_MASK);
+ }
+
+ /* skb path */
+ return addr + len > pool->addrs_cnt;
+}
+
+static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
+{
+ return addr & pool->chunk_mask;
+}
+
+static inline u64 xp_unaligned_extract_addr(u64 addr)
+{
+ return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline u64 xp_unaligned_extract_offset(u64 addr)
+{
+ return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline u64 xp_unaligned_add_offset_to_addr(u64 addr)
+{
+ return xp_unaligned_extract_addr(addr) +
+ xp_unaligned_extract_offset(addr);
+}
+
+static inline u32 xp_aligned_extract_idx(struct xsk_buff_pool *pool, u64 addr)
+{
+ return xp_aligned_extract_addr(pool, addr) >> pool->chunk_shift;
+}
+
+static inline void xp_release(struct xdp_buff_xsk *xskb)
+{
+ if (xskb->pool->unaligned)
+ xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
+}
+
+static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
+{
+ u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
+
+ offset += xskb->pool->headroom;
+ if (!xskb->pool->unaligned)
+ return xskb->orig_addr + offset;
+ return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+}
+
+#endif /* XSK_BUFF_POOL_H_ */