diff options
Diffstat (limited to 'net')
227 files changed, 5126 insertions, 2385 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index bf5736c1d458..a06f4d4a6f47 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1753,8 +1753,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int err = 0; struct sk_buff *skb; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); lock_sock(sk); if (!skb) diff --git a/net/atm/common.c b/net/atm/common.c index 1cfa9bf1d187..f7019df41c3e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -540,7 +540,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, !test_bit(ATM_VF_READY, &vcc->flags)) return 0; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error); + skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; @@ -553,7 +553,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, error = skb_copy_datagram_msg(skb, 0, msg, copied); if (error) return error; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 363d47f94532..116481e4da82 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1669,8 +1669,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } /* Now we can treat all alike */ - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a0cb2e3da8d4..b506409bb498 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -251,7 +251,6 @@ EXPORT_SYMBOL(bt_accept_dequeue); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; size_t copied; @@ -263,7 +262,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; @@ -281,7 +280,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err == 0) { - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name && bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, @@ -385,7 +384,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, copied += chunk; size -= chunk; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { int skb_len = skb_headlen(skb); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 33b3c0ffc339..189e3115c8c6 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1453,7 +1453,6 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; @@ -1470,7 +1469,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (sk->sk_state == BT_CLOSED) return 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index af709c182674..8d54fef9a568 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -551,8 +551,13 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) return sk; } +struct prog_test_member1 { + int a; +}; + struct prog_test_member { - u64 c; + struct prog_test_member1 m; + int c; }; struct prog_test_ref_kfunc { @@ -577,6 +582,12 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) return &prog_test_struct; } +noinline struct prog_test_member * +bpf_kfunc_call_memb_acquire(void) +{ + return &prog_test_struct.memb; +} + noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { } @@ -585,6 +596,16 @@ noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } +noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) +{ +} + +noinline struct prog_test_ref_kfunc * +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) +{ + return &prog_test_struct; +} + struct prog_test_pass1 { int x0; struct { @@ -668,8 +689,11 @@ BTF_ID(func, bpf_kfunc_call_test1) BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_memb_acquire) BTF_ID(func, bpf_kfunc_call_test_release) BTF_ID(func, bpf_kfunc_call_memb_release) +BTF_ID(func, bpf_kfunc_call_memb1_release) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) BTF_ID(func, bpf_kfunc_call_test_pass_ctx) BTF_ID(func, bpf_kfunc_call_test_pass1) BTF_ID(func, bpf_kfunc_call_test_pass2) @@ -683,17 +707,26 @@ BTF_SET_END(test_sk_check_kfunc_ids) BTF_SET_START(test_sk_acquire_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_memb_acquire) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) BTF_SET_END(test_sk_acquire_kfunc_ids) BTF_SET_START(test_sk_release_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test_release) BTF_ID(func, bpf_kfunc_call_memb_release) +BTF_ID(func, bpf_kfunc_call_memb1_release) BTF_SET_END(test_sk_release_kfunc_ids) BTF_SET_START(test_sk_ret_null_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_memb_acquire) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) BTF_SET_END(test_sk_ret_null_kfunc_ids) +BTF_SET_START(test_sk_kptr_acquire_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) +BTF_SET_END(test_sk_kptr_acquire_kfunc_ids) + static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) { @@ -1580,14 +1613,36 @@ out: static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { .owner = THIS_MODULE, - .check_set = &test_sk_check_kfunc_ids, - .acquire_set = &test_sk_acquire_kfunc_ids, - .release_set = &test_sk_release_kfunc_ids, - .ret_null_set = &test_sk_ret_null_kfunc_ids, + .check_set = &test_sk_check_kfunc_ids, + .acquire_set = &test_sk_acquire_kfunc_ids, + .release_set = &test_sk_release_kfunc_ids, + .ret_null_set = &test_sk_ret_null_kfunc_ids, + .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids }; +BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) +BTF_ID(struct, prog_test_ref_kfunc) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_ID(struct, prog_test_member) +BTF_ID(func, bpf_kfunc_call_memb_release) + static int __init bpf_prog_test_run_init(void) { - return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); + const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = { + { + .btf_id = bpf_prog_test_dtor_kfunc_ids[0], + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1] + }, + { + .btf_id = bpf_prog_test_dtor_kfunc_ids[2], + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3], + }, + }; + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); + return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc, + ARRAY_SIZE(bpf_prog_test_dtor_kfunc), + THIS_MODULE); } late_initcall(bpf_prog_test_run_init); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8d6bab244c4a..58a4f70e01e3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -465,6 +465,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fix_features = br_fix_features, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, + .ndo_fdb_del_bulk = br_fdb_delete_bulk, .ndo_fdb_dump = br_fdb_dump, .ndo_fdb_get = br_fdb_get, .ndo_bridge_getlink = br_getlink, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6ccda68bd473..1a3d583fbc8e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -558,18 +558,161 @@ void br_fdb_cleanup(struct work_struct *work) mod_delayed_work(system_long_wq, &br->gc_work, work_delay); } -/* Completely flush all dynamic entries in forwarding database.*/ -void br_fdb_flush(struct net_bridge *br) +static bool __fdb_flush_matches(const struct net_bridge *br, + const struct net_bridge_fdb_entry *f, + const struct net_bridge_fdb_flush_desc *desc) +{ + const struct net_bridge_port *dst = READ_ONCE(f->dst); + int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex; + + if (desc->vlan_id && desc->vlan_id != f->key.vlan_id) + return false; + if (desc->port_ifindex && desc->port_ifindex != port_ifidx) + return false; + if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags) + return false; + + return true; +} + +/* Flush forwarding database entries matching the description */ +void br_fdb_flush(struct net_bridge *br, + const struct net_bridge_fdb_flush_desc *desc) { struct net_bridge_fdb_entry *f; - struct hlist_node *tmp; - spin_lock_bh(&br->hash_lock); - hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { - if (!test_bit(BR_FDB_STATIC, &f->flags)) + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + if (!__fdb_flush_matches(br, f, desc)) + continue; + + spin_lock_bh(&br->hash_lock); + if (!hlist_unhashed(&f->fdb_node)) fdb_delete(br, f, true); + spin_unlock_bh(&br->hash_lock); } - spin_unlock_bh(&br->hash_lock); + rcu_read_unlock(); +} + +static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state) +{ + unsigned long flags = 0; + + if (ndm_state & NUD_PERMANENT) + __set_bit(BR_FDB_LOCAL, &flags); + if (ndm_state & NUD_NOARP) + __set_bit(BR_FDB_STATIC, &flags); + + return flags; +} + +static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags) +{ + unsigned long flags = 0; + + if (ndm_flags & NTF_USE) + __set_bit(BR_FDB_ADDED_BY_USER, &flags); + if (ndm_flags & NTF_EXT_LEARNED) + __set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags); + if (ndm_flags & NTF_OFFLOADED) + __set_bit(BR_FDB_OFFLOADED, &flags); + if (ndm_flags & NTF_STICKY) + __set_bit(BR_FDB_STICKY, &flags); + + return flags; +} + +static int __fdb_flush_validate_ifindex(const struct net_bridge *br, + int ifindex, + struct netlink_ext_ack *extack) +{ + const struct net_device *dev; + + dev = __dev_get_by_index(dev_net(br->dev), ifindex); + if (!dev) { + NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex"); + return -ENODEV; + } + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port"); + return -EINVAL; + } + if (netif_is_bridge_master(dev) && dev != br->dev) { + NL_SET_ERR_MSG_MOD(extack, + "Flush bridge device does not match target bridge device"); + return -EINVAL; + } + if (netif_is_bridge_port(dev)) { + struct net_bridge_port *p = br_port_get_rtnl(dev); + + if (p->br != br) { + NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); + return -EINVAL; + } + } + + return 0; +} + +int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, u16 vid, + struct netlink_ext_ack *extack) +{ + u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; + struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid }; + struct net_bridge_port *p = NULL; + struct net_bridge *br; + + if (netif_is_bridge_master(dev)) { + br = netdev_priv(dev); + } else { + p = br_port_get_rtnl(dev); + if (!p) { + NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port"); + return -EINVAL; + } + br = p->br; + } + + if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); + return -EINVAL; + } + if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set"); + return -EINVAL; + } + + desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state); + desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags); + if (tb[NDA_NDM_STATE_MASK]) { + u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]); + + desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask); + } + if (tb[NDA_NDM_FLAGS_MASK]) { + u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]); + + desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask); + } + if (tb[NDA_IFINDEX]) { + int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]); + + err = __fdb_flush_validate_ifindex(br, ifidx, extack); + if (err) + return err; + desc.port_ifindex = ifidx; + } else if (p) { + /* flush was invoked with port device and NTF_MASTER */ + desc.port_ifindex = p->dev->ifindex; + } + + br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n", + desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask); + + br_fdb_flush(br, &desc); + + return 0; } /* Flush all entries referring to a specific port. diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 4556d913955b..fdcc641fc89a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -251,14 +251,16 @@ static int __mdb_fill_info(struct sk_buff *skb, __mdb_entry_fill_flags(&e, flags); e.ifindex = ifindex; e.vid = mp->addr.vid; - if (mp->addr.proto == htons(ETH_P_IP)) + if (mp->addr.proto == htons(ETH_P_IP)) { e.addr.u.ip4 = mp->addr.dst.ip4; #if IS_ENABLED(CONFIG_IPV6) - else if (mp->addr.proto == htons(ETH_P_IPV6)) + } else if (mp->addr.proto == htons(ETH_P_IPV6)) { e.addr.u.ip6 = mp->addr.dst.ip6; #endif - else + } else { ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr); + e.state = MDB_PG_FLAGS_PERMANENT; + } e.addr.proto = mp->addr.proto; nest_ent = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO); @@ -873,8 +875,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -EINVAL; /* host join errors which can happen before creating the group */ - if (!port) { - /* don't allow any flags for host-joined groups */ + if (!port && !br_group_is_l2(&group)) { + /* don't allow any flags for host-joined IP groups */ if (entry->state) { NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); return -EINVAL; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 200ad05b296f..bb01776d2d88 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1326,8 +1326,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], br_recalculate_fwd_mask(br); } - if (data[IFLA_BR_FDB_FLUSH]) - br_fdb_flush(br); + if (data[IFLA_BR_FDB_FLUSH]) { + struct net_bridge_fdb_flush_desc desc = { + .flags_mask = BR_FDB_STATIC + }; + + br_fdb_flush(br, &desc); + } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (data[IFLA_BR_MCAST_ROUTER]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 18ccc3d5d296..6ae882cfae1c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -274,6 +274,13 @@ struct net_bridge_fdb_entry { struct rcu_head rcu; }; +struct net_bridge_fdb_flush_desc { + unsigned long flags; + unsigned long flags_mask; + int port_ifindex; + u16 vlan_id; +}; + #define MDB_PG_FLAGS_PERMANENT BIT(0) #define MDB_PG_FLAGS_OFFLOAD BIT(1) #define MDB_PG_FLAGS_FAST_LEAVE BIT(2) @@ -755,11 +762,17 @@ static inline void br_netpoll_disable(struct net_bridge_port *p) #endif /* br_fdb.c */ +#define FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF) +#define FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP) +#define FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_USE | NTF_EXT_LEARNED | \ + NTF_STICKY | NTF_OFFLOADED) + int br_fdb_init(void); void br_fdb_fini(void); int br_fdb_hash_init(struct net_bridge *br); void br_fdb_hash_fini(struct net_bridge *br); -void br_fdb_flush(struct net_bridge *br); +void br_fdb_flush(struct net_bridge *br, + const struct net_bridge_fdb_flush_desc *desc); void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid); @@ -781,6 +794,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); +int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, u16 vid, + struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 18affda2b522..8f3d76c751dd 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -72,7 +72,8 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, /* Flags that can be offloaded to hardware */ #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \ - BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED) + BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED | \ + BR_HAIRPIN_MODE | BR_ISOLATED | BR_MULTICAST_TO_UNICAST) int br_switchdev_set_port_flag(struct net_bridge_port *p, unsigned long flags, diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 3f7ca88c2aa3..612e367fff20 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -344,7 +344,11 @@ static DEVICE_ATTR_RW(group_addr); static int set_flush(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br_fdb_flush(br); + struct net_bridge_fdb_flush_desc desc = { + .flags_mask = BR_FDB_STATIC + }; + + br_fdb_flush(br, &desc); return 0; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 2b8892d502f7..251e666ba9a2 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -282,7 +282,7 @@ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, if (flags & MSG_OOB) goto read_error; - skb = skb_recv_datagram(sk, flags, 0 , &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) goto read_error; copylen = skb->len; diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d209b52e6a..65ee1b784a30 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1632,12 +1632,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; - int noblock; int err; - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; - skb = skb_recv_datagram(sk, flags, noblock, &error); + skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; @@ -1650,7 +1647,7 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return err; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(BCM_MIN_NAMELEN); diff --git a/net/can/isotp.c b/net/can/isotp.c index 1e7c6a460ef9..35a1ae61744c 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1055,7 +1055,6 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; struct isotp_sock *so = isotp_sk(sk); - int noblock = flags & MSG_DONTWAIT; int ret = 0; if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) @@ -1064,8 +1063,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (!so->bound) return -EADDRNOTAVAIL; - flags &= ~MSG_DONTWAIT; - skb = skb_recv_datagram(sk, flags, noblock, &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) return ret; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6dff4510687a..f5ecfdcf57b2 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -802,7 +802,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939, SCM_J1939_ERRQUEUE); - skb = skb_recv_datagram(sk, flags, 0, &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) return ret; @@ -841,7 +841,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, paddr->can_addr.j1939.pgn = skcb->addr.pgn; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); msg->msg_flags |= skcb->msg_flags; skb_free_datagram(sk, skb); diff --git a/net/can/raw.c b/net/can/raw.c index 7105fa4824e4..b7dbb57557f3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -846,16 +846,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; int err = 0; - int noblock; - - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; if (flags & MSG_ERRQUEUE) return sock_recv_errqueue(sk, msg, size, SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; @@ -870,7 +866,7 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return err; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(RAW_MIN_NAMELEN); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index e3ac36380520..a25ec93729b9 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata)); + bpf_selem_unlink(SELEM(sdata), true); return 0; } @@ -75,8 +75,8 @@ void bpf_sk_storage_free(struct sock *sk) * sk_storage. */ bpf_selem_unlink_map(selem); - free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage, - selem, true); + free_sk_storage = bpf_selem_unlink_storage_nolock( + sk_storage, selem, true, false); } raw_spin_unlock_bh(&sk_storage->lock); rcu_read_unlock(); @@ -338,7 +338,7 @@ bpf_sk_storage_ptr(void *owner) return &sk->sk_bpf_storage; } -static int sk_storage_map_btf_id; +BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map) const struct bpf_map_ops sk_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, @@ -349,8 +349,7 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, - .map_btf_name = "bpf_local_storage_map", - .map_btf_id = &sk_storage_map_btf_id, + .map_btf_id = &sk_storage_map_btf_ids[0], .map_local_storage_charge = bpf_sk_storage_charge, .map_local_storage_uncharge = bpf_sk_storage_uncharge, .map_owner_storage_ptr = bpf_sk_storage_ptr, diff --git a/net/core/datagram.c b/net/core/datagram.c index ee290776c661..50f4faeea76c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -62,8 +62,6 @@ #include <trace/events/skb.h> #include <net/busy_poll.h> -#include "datagram.h" - /* * Is a socket 'connection oriented' ? */ @@ -310,12 +308,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, - int noblock, int *err) + int *err) { int off = 0; - return __skb_recv_datagram(sk, &sk->sk_receive_queue, - flags | (noblock ? MSG_DONTWAIT : 0), + return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/datagram.h b/net/core/datagram.h deleted file mode 100644 index bcfb75bfa3b2..000000000000 --- a/net/core/datagram.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _NET_CORE_DATAGRAM_H_ -#define _NET_CORE_DATAGRAM_H_ - -#include <linux/types.h> - -struct sock; -struct sk_buff; -struct iov_iter; - -int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length); - -#endif /* _NET_CORE_DATAGRAM_H_ */ diff --git a/net/core/dev.c b/net/core/dev.c index 1461c2d9dec8..c2d73595a7c3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -151,6 +151,7 @@ #include <linux/prandom.h> #include <linux/once_lite.h> +#include "dev.h" #include "net-sysfs.h" @@ -701,6 +702,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, if (WARN_ON_ONCE(last_dev == ctx.dev)) return -1; } + + if (!ctx.dev) + return ret; + path = dev_fwd_path(stack); if (!path) return -1; @@ -3920,6 +3925,25 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return skb; } + +static struct netdev_queue * +netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb) +{ + int qm = skb_get_queue_mapping(skb); + + return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm)); +} + +static bool netdev_xmit_txqueue_skipped(void) +{ + return __this_cpu_read(softnet_data.xmit.skip_txqueue); +} + +void netdev_xmit_skip_txqueue(bool skip) +{ + __this_cpu_write(softnet_data.xmit.skip_txqueue, skip); +} +EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_XPS @@ -4087,10 +4111,10 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ -static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) +int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) { struct net_device *dev = skb->dev; - struct netdev_queue *txq; + struct netdev_queue *txq = NULL; struct Qdisc *q; int rc = -ENOMEM; bool again = false; @@ -4118,11 +4142,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (!skb) goto out; } + + netdev_xmit_skip_txqueue(false); + nf_skip_egress(skb, true); skb = sch_handle_egress(skb, &rc, dev); if (!skb) goto out; nf_skip_egress(skb, false); + + if (netdev_xmit_txqueue_skipped()) + txq = netdev_tx_queue_mapping(dev, skb); } #endif /* If device/qdisc don't need skb->dst, release it right now while @@ -4133,7 +4163,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) else skb_dst_force(skb); - txq = netdev_core_pick_tx(dev, skb, sb_dev); + if (!txq) + txq = netdev_core_pick_tx(dev, skb, sb_dev); + q = rcu_dereference_bh(txq->qdisc); trace_net_dev_queue(skb); @@ -4203,18 +4235,7 @@ out: rcu_read_unlock_bh(); return rc; } - -int dev_queue_xmit(struct sk_buff *skb) -{ - return __dev_queue_xmit(skb, NULL); -} -EXPORT_SYMBOL(dev_queue_xmit); - -int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) -{ - return __dev_queue_xmit(skb, sb_dev); -} -EXPORT_SYMBOL(dev_queue_xmit_accel); +EXPORT_SYMBOL(__dev_queue_xmit); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { @@ -4513,6 +4534,12 @@ static void rps_trigger_softirq(void *data) #endif /* CONFIG_RPS */ +/* Called from hardirq (IPI) context */ +static void trigger_rx_softirq(void *data __always_unused) +{ + __raise_softirq_irqoff(NET_RX_SOFTIRQ); +} + /* * Check if this softnet_data structure is another cpu one * If yes, queue it to our IPI list and return 1 @@ -5370,13 +5397,11 @@ check_vlan_id: *ppt_prev = pt_prev; } else { drop: - if (!deliver_exact) { + if (!deliver_exact) dev_core_stats_rx_dropped_inc(skb->dev); - kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT); - } else { + else dev_core_stats_rx_nohandler_inc(skb->dev); - kfree_skb(skb); - } + kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO); /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) */ @@ -6278,8 +6303,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } EXPORT_SYMBOL(dev_set_threaded); -void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) +void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) { if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state))) return; @@ -6312,7 +6337,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, if (dev->threaded && napi_kthread_create(napi)) dev->threaded = 0; } -EXPORT_SYMBOL(netif_napi_add); +EXPORT_SYMBOL(netif_napi_add_weight); void napi_disable(struct napi_struct *n) { @@ -6541,6 +6566,28 @@ static int napi_threaded_poll(void *data) return 0; } +static void skb_defer_free_flush(struct softnet_data *sd) +{ + struct sk_buff *skb, *next; + unsigned long flags; + + /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ + if (!READ_ONCE(sd->defer_list)) + return; + + spin_lock_irqsave(&sd->defer_lock, flags); + skb = sd->defer_list; + sd->defer_list = NULL; + sd->defer_count = 0; + spin_unlock_irqrestore(&sd->defer_lock, flags); + + while (skb != NULL) { + next = skb->next; + __kfree_skb(skb); + skb = next; + } +} + static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -6559,7 +6606,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) if (list_empty(&list)) { if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) - return; + goto end; break; } @@ -6586,6 +6633,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); +end: + skb_defer_free_flush(sd); } struct netdev_adjacent { @@ -8641,7 +8690,6 @@ void dev_set_group(struct net_device *dev, int new_group) { dev->group = new_group; } -EXPORT_SYMBOL(dev_set_group); /** * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR. @@ -8756,7 +8804,6 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier) return -ENODEV; return ops->ndo_change_carrier(dev, new_carrier); } -EXPORT_SYMBOL(dev_change_carrier); /** * dev_get_phys_port_id - Get device physical port ID @@ -8774,7 +8821,6 @@ int dev_get_phys_port_id(struct net_device *dev, return -EOPNOTSUPP; return ops->ndo_get_phys_port_id(dev, ppid); } -EXPORT_SYMBOL(dev_get_phys_port_id); /** * dev_get_phys_port_name - Get device physical port name @@ -8797,7 +8843,6 @@ int dev_get_phys_port_name(struct net_device *dev, } return devlink_compat_phys_port_name_get(dev, name, len); } -EXPORT_SYMBOL(dev_get_phys_port_name); /** * dev_get_port_parent_id - Get the device's port parent identifier @@ -8879,7 +8924,6 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) dev->proto_down = proto_down; return 0; } -EXPORT_SYMBOL(dev_change_proto_down); /** * dev_change_proto_down_reason - proto down reason @@ -8904,7 +8948,6 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, } } } -EXPORT_SYMBOL(dev_change_proto_down_reason); struct bpf_xdp_link { struct bpf_link link; @@ -9431,7 +9474,7 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static LIST_HEAD(net_todo_list); +LIST_HEAD(net_todo_list); DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) @@ -10355,6 +10398,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, storage->rx_dropped += READ_ONCE(core_stats->rx_dropped); storage->tx_dropped += READ_ONCE(core_stats->tx_dropped); storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler); + storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped); } } return storage; @@ -11297,6 +11341,8 @@ static int __init net_dev_init(void) INIT_CSD(&sd->csd, rps_trigger_softirq, sd); sd->cpu = i; #endif + INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL); + spin_lock_init(&sd->defer_lock); init_gro_hash(&sd->backlog); sd->backlog.poll = process_backlog; diff --git a/net/core/dev.h b/net/core/dev.h new file mode 100644 index 000000000000..27923df00637 --- /dev/null +++ b/net/core/dev.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_CORE_DEV_H +#define _NET_CORE_DEV_H + +#include <linux/types.h> + +struct net; +struct net_device; +struct netdev_bpf; +struct netdev_phys_item_id; +struct netlink_ext_ack; + +/* Random bits of netdevice that don't need to be exposed */ +#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ +struct sd_flow_limit { + u64 count; + unsigned int num_buckets; + unsigned int history_head; + u16 history[FLOW_LIMIT_HISTORY]; + u8 buckets[]; +}; + +extern int netdev_flow_limit_table_len; + +#ifdef CONFIG_PROC_FS +int __init dev_proc_init(void); +#else +#define dev_proc_init() 0 +#endif + +void linkwatch_init_dev(struct net_device *dev); +void linkwatch_forget_dev(struct net_device *dev); +void linkwatch_run_queue(void); + +void dev_addr_flush(struct net_device *dev); +int dev_addr_init(struct net_device *dev); +void dev_addr_check(struct net_device *dev); + +/* sysctls not referred to from outside net/core/ */ +extern int netdev_budget; +extern unsigned int netdev_budget_usecs; + +extern int netdev_tstamp_prequeue; +extern int netdev_unregister_timeout_secs; +extern int weight_p; +extern int dev_weight_rx_bias; +extern int dev_weight_tx_bias; + +/* rtnl helpers */ +extern struct list_head net_todo_list; +void netdev_run_todo(void); + +/* netdev management, shared between various uAPI entry points */ +struct netdev_name_node { + struct hlist_node hlist; + struct list_head list; + struct net_device *dev; + const char *name; +}; + +int netdev_get_name(struct net *net, char *name, int ifindex); +int dev_change_name(struct net_device *dev, const char *newname); + +int netdev_name_node_alt_create(struct net_device *dev, const char *name); +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); + +int dev_validate_mtu(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); +int dev_set_mtu_ext(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); + +int dev_get_phys_port_id(struct net_device *dev, + struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); + +int dev_change_proto_down(struct net_device *dev, bool proto_down); +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, + u32 value); + +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, int expected_fd, u32 flags); + +int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len); +void dev_set_group(struct net_device *dev, int new_group); +int dev_change_carrier(struct net_device *dev, bool new_carrier); + +void __dev_set_rx_mode(struct net_device *dev); + +#endif diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bead38ca50bd..baa63dee2829 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -12,6 +12,8 @@ #include <linux/export.h> #include <linux/list.h> +#include "dev.h" + /* * General list handling functions */ diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1b807d119da5..4f6be442ae7e 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -10,6 +10,8 @@ #include <net/dsa.h> #include <net/wext.h> +#include "dev.h" + /* * Map an interface index to its name (SIOCGIFNAME) */ diff --git a/net/core/devlink.c b/net/core/devlink.c index aeca13b6e57b..5f441a0e34f4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -54,6 +54,8 @@ struct devlink { struct list_head trap_list; struct list_head trap_group_list; struct list_head trap_policer_list; + struct list_head linecard_list; + struct mutex linecards_lock; /* protects linecard_list */ const struct devlink_ops *ops; u64 features; struct xarray snapshot_ids; @@ -70,6 +72,24 @@ struct devlink { char priv[] __aligned(NETDEV_ALIGN); }; +struct devlink_linecard_ops; +struct devlink_linecard_type; + +struct devlink_linecard { + struct list_head list; + struct devlink *devlink; + unsigned int index; + refcount_t refcount; + const struct devlink_linecard_ops *ops; + void *priv; + enum devlink_linecard_state state; + struct mutex state_lock; /* Protects state and device_list */ + const char *type; + struct devlink_linecard_type *types; + unsigned int types_count; + struct list_head device_list; +}; + /** * struct devlink_resource - devlink resource * @name: name of the resource @@ -397,6 +417,58 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) return ERR_PTR(-EINVAL); } +static struct devlink_linecard * +devlink_linecard_get_by_index(struct devlink *devlink, + unsigned int linecard_index) +{ + struct devlink_linecard *devlink_linecard; + + list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) { + if (devlink_linecard->index == linecard_index) + return devlink_linecard; + } + return NULL; +} + +static bool devlink_linecard_index_exists(struct devlink *devlink, + unsigned int linecard_index) +{ + return devlink_linecard_get_by_index(devlink, linecard_index); +} + +static struct devlink_linecard * +devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) +{ + if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) { + u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]); + struct devlink_linecard *linecard; + + mutex_lock(&devlink->linecards_lock); + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (linecard) + refcount_inc(&linecard->refcount); + mutex_unlock(&devlink->linecards_lock); + if (!linecard) + return ERR_PTR(-ENODEV); + return linecard; + } + return ERR_PTR(-EINVAL); +} + +static struct devlink_linecard * +devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + return devlink_linecard_get_from_attrs(devlink, info->attrs); +} + +static void devlink_linecard_put(struct devlink_linecard *linecard) +{ + if (refcount_dec_and_test(&linecard->refcount)) { + mutex_destroy(&linecard->state_lock); + kfree(linecard); + } +} + struct devlink_sb { struct list_head list; unsigned int index; @@ -617,16 +689,18 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_RATE BIT(2) #define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) +#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(5) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct devlink_linecard *linecard; struct devlink_port *devlink_port; struct devlink *devlink; int err; @@ -669,6 +743,13 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, goto unlock; } info->user_ptr[1] = rate_node; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { + linecard = devlink_linecard_get_from_info(devlink, info); + if (IS_ERR(linecard)) { + err = PTR_ERR(linecard); + goto unlock; + } + info->user_ptr[1] = linecard; } return 0; @@ -683,9 +764,14 @@ unlock: static void devlink_nl_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct devlink_linecard *linecard; struct devlink *devlink; devlink = info->user_ptr[0]; + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { + linecard = info->user_ptr[1]; + devlink_linecard_put(linecard); + } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); devlink_put(devlink); @@ -1158,6 +1244,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg, goto nla_put_failure; if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) goto nla_put_failure; + if (devlink_port->linecard && + nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, + devlink_port->linecard->index)) + goto nla_put_failure; genlmsg_end(msg, hdr); return 0; @@ -1964,6 +2054,562 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, return err; } +struct devlink_linecard_type { + const char *type; + const void *priv; +}; + +struct devlink_linecard_device { + struct list_head list; + unsigned int index; + void *priv; +}; + +static int +devlink_nl_linecard_device_fill(struct sk_buff *msg, + struct devlink_linecard_device *linecard_device) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE); + if (!attr) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX, + linecard_device->index)) { + nla_nest_cancel(msg, attr); + return -EMSGSIZE; + } + nla_nest_end(msg, attr); + + return 0; +} + +static int devlink_nl_linecard_devices_fill(struct sk_buff *msg, + struct devlink_linecard *linecard) +{ + struct devlink_linecard_device *linecard_device; + struct nlattr *attr; + int err; + + if (list_empty(&linecard->device_list)) + return 0; + + attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST); + if (!attr) + return -EMSGSIZE; + list_for_each_entry(linecard_device, &linecard->device_list, list) { + err = devlink_nl_linecard_device_fill(msg, linecard_device); + if (err) { + nla_nest_cancel(msg, attr); + return err; + } + } + nla_nest_end(msg, attr); + + return 0; +} + +static int devlink_nl_linecard_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_linecard *linecard, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, + struct netlink_ext_ack *extack) +{ + struct devlink_linecard_type *linecard_type; + struct nlattr *attr; + void *hdr; + int err; + int i; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state)) + goto nla_put_failure; + if (linecard->type && + nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type)) + goto nla_put_failure; + + if (linecard->types_count) { + attr = nla_nest_start(msg, + DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES); + if (!attr) + goto nla_put_failure; + for (i = 0; i < linecard->types_count; i++) { + linecard_type = &linecard->types[i]; + if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, + linecard_type->type)) { + nla_nest_cancel(msg, attr); + goto nla_put_failure; + } + } + nla_nest_end(msg, attr); + } + + err = devlink_nl_linecard_devices_fill(msg, linecard); + if (err) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static void devlink_linecard_notify(struct devlink_linecard *linecard, + enum devlink_command cmd) +{ + struct devlink *devlink = linecard->devlink; + struct sk_buff *msg; + int err; + + WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW && + cmd != DEVLINK_CMD_LINECARD_DEL); + + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0, + NULL); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_linecard *linecard = info->user_ptr[1]; + struct devlink *devlink = linecard->devlink; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&linecard->state_lock); + err = devlink_nl_linecard_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_NEW, + info->snd_portid, info->snd_seq, 0, + info->extack); + mutex_unlock(&linecard->state_lock); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_linecard *linecard; + struct devlink *devlink; + int start = cb->args[0]; + unsigned long index; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + + mutex_lock(&devlink->linecards_lock); + list_for_each_entry(linecard, &devlink->linecard_list, list) { + if (idx < start) { + idx++; + continue; + } + mutex_lock(&linecard->state_lock); + err = devlink_nl_linecard_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + cb->extack); + mutex_unlock(&linecard->state_lock); + if (err) { + mutex_unlock(&devlink->linecards_lock); + devlink_put(devlink); + goto out; + } + idx++; + } + mutex_unlock(&devlink->linecards_lock); +retry: + devlink_put(devlink); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static struct devlink_linecard_type * +devlink_linecard_type_lookup(struct devlink_linecard *linecard, + const char *type) +{ + struct devlink_linecard_type *linecard_type; + int i; + + for (i = 0; i < linecard->types_count; i++) { + linecard_type = &linecard->types[i]; + if (!strcmp(type, linecard_type->type)) + return linecard_type; + } + return NULL; +} + +static int devlink_linecard_type_set(struct devlink_linecard *linecard, + const char *type, + struct netlink_ext_ack *extack) +{ + const struct devlink_linecard_ops *ops = linecard->ops; + struct devlink_linecard_type *linecard_type; + int err; + + mutex_lock(&linecard->state_lock); + if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned"); + err = -EBUSY; + goto out; + } + if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned"); + err = -EBUSY; + goto out; + } + + linecard_type = devlink_linecard_type_lookup(linecard, type); + if (!linecard_type) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided"); + err = -EINVAL; + goto out; + } + + if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED && + linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) { + NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned"); + err = -EBUSY; + /* Check if the line card is provisioned in the same + * way the user asks. In case it is, make the operation + * to return success. + */ + if (ops->same_provision && + ops->same_provision(linecard, linecard->priv, + linecard_type->type, + linecard_type->priv)) + err = 0; + goto out; + } + + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING; + linecard->type = linecard_type->type; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + err = ops->provision(linecard, linecard->priv, linecard_type->type, + linecard_type->priv, extack); + if (err) { + /* Provisioning failed. Assume the linecard is unprovisioned + * for future operations. + */ + mutex_lock(&linecard->state_lock); + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + } + return err; + +out: + mutex_unlock(&linecard->state_lock); + return err; +} + +static int devlink_linecard_type_unset(struct devlink_linecard *linecard, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&linecard->state_lock); + if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned"); + err = -EBUSY; + goto out; + } + if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned"); + err = -EBUSY; + goto out; + } + if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) { + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + err = 0; + goto out; + } + + if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) { + NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned"); + err = 0; + goto out; + } + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + err = linecard->ops->unprovision(linecard, linecard->priv, + extack); + if (err) { + /* Unprovisioning failed. Assume the linecard is unprovisioned + * for future operations. + */ + mutex_lock(&linecard->state_lock); + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + } + return err; + +out: + mutex_unlock(&linecard->state_lock); + return err; +} + +static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_linecard *linecard = info->user_ptr[1]; + struct netlink_ext_ack *extack = info->extack; + int err; + + if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) { + const char *type; + + type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]); + if (strcmp(type, "")) { + err = devlink_linecard_type_set(linecard, type, extack); + if (err) + return err; + } else { + err = devlink_linecard_type_unset(linecard, extack); + if (err) + return err; + } + } + + return 0; +} + +struct devlink_info_req { + struct sk_buff *msg; +}; + +static int +devlink_nl_linecard_device_info_fill(struct sk_buff *msg, + struct devlink_linecard *linecard, + struct devlink_linecard_device *linecard_device, + struct netlink_ext_ack *extack) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE); + if (!attr) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX, + linecard_device->index)) { + nla_nest_cancel(msg, attr); + return -EMSGSIZE; + } + if (linecard->ops->device_info_get) { + struct devlink_info_req req; + int err; + + req.msg = msg; + err = linecard->ops->device_info_get(linecard_device, + linecard_device->priv, + &req, extack); + if (err) { + nla_nest_cancel(msg, attr); + return err; + } + } + nla_nest_end(msg, attr); + + return 0; +} + +static int devlink_nl_linecard_devices_info_fill(struct sk_buff *msg, + struct devlink_linecard *linecard, + struct netlink_ext_ack *extack) +{ + struct devlink_linecard_device *linecard_device; + struct nlattr *attr; + int err; + + if (list_empty(&linecard->device_list)) + return 0; + + attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST); + if (!attr) + return -EMSGSIZE; + list_for_each_entry(linecard_device, &linecard->device_list, list) { + err = devlink_nl_linecard_device_info_fill(msg, linecard, + linecard_device, + extack); + if (err) { + nla_nest_cancel(msg, attr); + return err; + } + } + nla_nest_end(msg, attr); + + return 0; +} + +static int +devlink_nl_linecard_info_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_linecard *linecard, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, struct netlink_ext_ack *extack) +{ + struct devlink_info_req req; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + err = -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index)) + goto nla_put_failure; + + req.msg = msg; + err = linecard->ops->info_get(linecard, linecard->priv, &req, extack); + if (err) + goto nla_put_failure; + + err = devlink_nl_linecard_devices_info_fill(msg, linecard, extack); + if (err) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return err; +} + +static int devlink_nl_cmd_linecard_info_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_linecard *linecard = info->user_ptr[1]; + struct devlink *devlink = linecard->devlink; + struct sk_buff *msg; + int err; + + if (!linecard->ops->info_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&linecard->state_lock); + err = devlink_nl_linecard_info_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_INFO_GET, + info->snd_portid, info->snd_seq, 0, + info->extack); + mutex_unlock(&linecard->state_lock); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_linecard_info_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_linecard *linecard; + struct devlink *devlink; + int start = cb->args[0]; + unsigned long index; + int idx = 0; + int err = 0; + + mutex_lock(&devlink_mutex); + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + + mutex_lock(&devlink->linecards_lock); + list_for_each_entry(linecard, &devlink->linecard_list, list) { + if (idx < start || !linecard->ops->info_get) { + idx++; + continue; + } + mutex_lock(&linecard->state_lock); + err = devlink_nl_linecard_info_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_INFO_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + cb->extack); + mutex_unlock(&linecard->state_lock); + if (err) { + mutex_unlock(&devlink->linecards_lock); + devlink_put(devlink); + goto out; + } + idx++; + } + mutex_unlock(&devlink->linecards_lock); +retry: + devlink_put(devlink); + } +out: + mutex_unlock(&devlink_mutex); + + if (err != -EMSGSIZE) + return err; + + cb->args[0] = idx; + return msg->len; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -5956,10 +6602,6 @@ out_dev: return err; } -struct devlink_info_req { - struct sk_buff *msg; -}; - int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) { return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); @@ -8589,6 +9231,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -8665,6 +9309,26 @@ static const struct genl_small_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { + .cmd = DEVLINK_CMD_LINECARD_GET, + .doit = devlink_nl_cmd_linecard_get_doit, + .dumpit = devlink_nl_cmd_linecard_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_LINECARD_SET, + .doit = devlink_nl_cmd_linecard_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, + }, + { + .cmd = DEVLINK_CMD_LINECARD_INFO_GET, + .doit = devlink_nl_cmd_linecard_info_get_doit, + .dumpit = devlink_nl_cmd_linecard_info_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, + /* can be retrieved by unprivileged users */ + }, + { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_get_doit, @@ -9043,6 +9707,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->rate_list); + INIT_LIST_HEAD(&devlink->linecard_list); INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); INIT_LIST_HEAD(&devlink->resource_list); @@ -9054,6 +9719,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->trap_policer_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); + mutex_init(&devlink->linecards_lock); refcount_set(&devlink->refcount, 1); init_completion(&devlink->comp); @@ -9080,10 +9746,14 @@ static void devlink_notify_register(struct devlink *devlink) struct devlink_param_item *param_item; struct devlink_trap_item *trap_item; struct devlink_port *devlink_port; + struct devlink_linecard *linecard; struct devlink_rate *rate_node; struct devlink_region *region; devlink_notify(devlink, DEVLINK_CMD_NEW); + list_for_each_entry(linecard, &devlink->linecard_list, list) + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + list_for_each_entry(devlink_port, &devlink->port_list, list) devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); @@ -9191,6 +9861,7 @@ void devlink_free(struct devlink *devlink) { ASSERT_DEVLINK_NOT_REGISTERED(devlink); + mutex_destroy(&devlink->linecards_lock); mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->trap_policer_list)); @@ -9203,6 +9874,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->dpipe_table_list)); WARN_ON(!list_empty(&devlink->sb_list)); WARN_ON(!list_empty(&devlink->rate_list)); + WARN_ON(!list_empty(&devlink->linecard_list)); WARN_ON(!list_empty(&devlink->port_list)); xa_destroy(&devlink->snapshot_ids); @@ -9681,6 +10353,21 @@ void devlink_rate_nodes_destroy(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy); +/** + * devlink_port_linecard_set - Link port with a linecard + * + * @devlink_port: devlink port + * @linecard: devlink linecard + */ +void devlink_port_linecard_set(struct devlink_port *devlink_port, + struct devlink_linecard *linecard) +{ + if (WARN_ON(devlink_port->devlink)) + return; + devlink_port->linecard = linecard; +} +EXPORT_SYMBOL_GPL(devlink_port_linecard_set); + static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, char *name, size_t len) { @@ -9692,7 +10379,12 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - n = snprintf(name, len, "p%u", attrs->phys.port_number); + if (devlink_port->linecard) + n = snprintf(name, len, "l%u", + devlink_port->linecard->index); + if (n < len) + n += snprintf(name + n, len - n, "p%u", + attrs->phys.port_number); if (n < len && attrs->split) n += snprintf(name + n, len - n, "s%u", attrs->phys.split_subport_number); @@ -9747,6 +10439,256 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, return 0; } +static int devlink_linecard_types_init(struct devlink_linecard *linecard) +{ + struct devlink_linecard_type *linecard_type; + unsigned int count; + int i; + + count = linecard->ops->types_count(linecard, linecard->priv); + linecard->types = kmalloc_array(count, sizeof(*linecard_type), + GFP_KERNEL); + if (!linecard->types) + return -ENOMEM; + linecard->types_count = count; + + for (i = 0; i < count; i++) { + linecard_type = &linecard->types[i]; + linecard->ops->types_get(linecard, linecard->priv, i, + &linecard_type->type, + &linecard_type->priv); + } + return 0; +} + +static void devlink_linecard_types_fini(struct devlink_linecard *linecard) +{ + kfree(linecard->types); +} + +/** + * devlink_linecard_create - Create devlink linecard + * + * @devlink: devlink + * @linecard_index: driver-specific numerical identifier of the linecard + * @ops: linecards ops + * @priv: user priv pointer + * + * Create devlink linecard instance with provided linecard index. + * Caller can use any indexing, even hw-related one. + * + * Return: Line card structure or an ERR_PTR() encoded error code. + */ +struct devlink_linecard * +devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, + const struct devlink_linecard_ops *ops, void *priv) +{ + struct devlink_linecard *linecard; + int err; + + if (WARN_ON(!ops || !ops->provision || !ops->unprovision || + !ops->types_count || !ops->types_get)) + return ERR_PTR(-EINVAL); + + mutex_lock(&devlink->linecards_lock); + if (devlink_linecard_index_exists(devlink, linecard_index)) { + mutex_unlock(&devlink->linecards_lock); + return ERR_PTR(-EEXIST); + } + + linecard = kzalloc(sizeof(*linecard), GFP_KERNEL); + if (!linecard) { + mutex_unlock(&devlink->linecards_lock); + return ERR_PTR(-ENOMEM); + } + + linecard->devlink = devlink; + linecard->index = linecard_index; + linecard->ops = ops; + linecard->priv = priv; + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + mutex_init(&linecard->state_lock); + INIT_LIST_HEAD(&linecard->device_list); + + err = devlink_linecard_types_init(linecard); + if (err) { + mutex_destroy(&linecard->state_lock); + kfree(linecard); + mutex_unlock(&devlink->linecards_lock); + return ERR_PTR(err); + } + + list_add_tail(&linecard->list, &devlink->linecard_list); + refcount_set(&linecard->refcount, 1); + mutex_unlock(&devlink->linecards_lock); + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + return linecard; +} +EXPORT_SYMBOL_GPL(devlink_linecard_create); + +/** + * devlink_linecard_destroy - Destroy devlink linecard + * + * @linecard: devlink linecard + */ +void devlink_linecard_destroy(struct devlink_linecard *linecard) +{ + struct devlink *devlink = linecard->devlink; + + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); + WARN_ON(!list_empty(&linecard->device_list)); + mutex_lock(&devlink->linecards_lock); + list_del(&linecard->list); + devlink_linecard_types_fini(linecard); + mutex_unlock(&devlink->linecards_lock); + devlink_linecard_put(linecard); +} +EXPORT_SYMBOL_GPL(devlink_linecard_destroy); + +/** + * devlink_linecard_device_create - Create a device on linecard + * + * @linecard: devlink linecard + * @device_index: index of the linecard device + * @priv: user priv pointer + * + * Return: Line card device structure or an ERR_PTR() encoded error code. + */ +struct devlink_linecard_device * +devlink_linecard_device_create(struct devlink_linecard *linecard, + unsigned int device_index, void *priv) +{ + struct devlink_linecard_device *linecard_device; + + linecard_device = kzalloc(sizeof(*linecard_device), GFP_KERNEL); + if (!linecard_device) + return ERR_PTR(-ENOMEM); + linecard_device->index = device_index; + linecard_device->priv = priv; + mutex_lock(&linecard->state_lock); + list_add_tail(&linecard_device->list, &linecard->device_list); + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + return linecard_device; +} +EXPORT_SYMBOL_GPL(devlink_linecard_device_create); + +/** + * devlink_linecard_device_destroy - Destroy device on linecard + * + * @linecard: devlink linecard + * @linecard_device: devlink linecard device + */ +void +devlink_linecard_device_destroy(struct devlink_linecard *linecard, + struct devlink_linecard_device *linecard_device) +{ + mutex_lock(&linecard->state_lock); + list_del(&linecard_device->list); + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + kfree(linecard_device); +} +EXPORT_SYMBOL_GPL(devlink_linecard_device_destroy); + +/** + * devlink_linecard_provision_set - Set provisioning on linecard + * + * @linecard: devlink linecard + * @type: linecard type + * + * This is either called directly from the provision() op call or + * as a result of the provision() op call asynchronously. + */ +void devlink_linecard_provision_set(struct devlink_linecard *linecard, + const char *type) +{ + mutex_lock(&linecard->state_lock); + WARN_ON(linecard->type && strcmp(linecard->type, type)); + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED; + linecard->type = type; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); + +/** + * devlink_linecard_provision_clear - Clear provisioning on linecard + * + * @linecard: devlink linecard + * + * This is either called directly from the unprovision() op call or + * as a result of the unprovision() op call asynchronously. + */ +void devlink_linecard_provision_clear(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + WARN_ON(!list_empty(&linecard->device_list)); + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); + +/** + * devlink_linecard_provision_fail - Fail provisioning on linecard + * + * @linecard: devlink linecard + * + * This is either called directly from the provision() op call or + * as a result of the provision() op call asynchronously. + */ +void devlink_linecard_provision_fail(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail); + +/** + * devlink_linecard_activate - Set linecard active + * + * @linecard: devlink linecard + */ +void devlink_linecard_activate(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED); + linecard->state = DEVLINK_LINECARD_STATE_ACTIVE; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_activate); + +/** + * devlink_linecard_deactivate - Set linecard inactive + * + * @linecard: devlink linecard + */ +void devlink_linecard_deactivate(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + switch (linecard->state) { + case DEVLINK_LINECARD_STATE_ACTIVE: + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + break; + case DEVLINK_LINECARD_STATE_UNPROVISIONING: + /* Line card is being deactivated as part + * of unprovisioning flow. + */ + break; + default: + WARN_ON(1); + break; + } + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); + int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/net/core/filter.c b/net/core/filter.c index 64470a727ef7..b741b9f7e6a9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1687,7 +1687,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) return -EINVAL; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; @@ -1722,7 +1722,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, { void *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); @@ -5173,7 +5173,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else - tp->snd_cwnd = val; + tcp_snd_cwnd_set(tp, val); break; case TCP_BPF_SNDCWND_CLAMP: if (val <= 0) { @@ -6621,7 +6621,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .func = bpf_sk_release, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE, }; BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, @@ -7099,7 +7099,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, */ switch (((struct iphdr *)iph)->version) { case 4: - if (sk->sk_family == AF_INET6 && sk->sk_ipv6only) + if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; mss = tcp_v4_get_syncookie(sk, iph, th, &cookie); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6f7ec72016dc..6aee04f75e3e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1035,6 +1035,16 @@ bool __skb_flow_dissect(const struct net *net, memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs)); } + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) { + struct flow_dissector_key_num_of_vlans *key_num_of_vlans; + + key_num_of_vlans = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, + target_container); + key_num_of_vlans->num_of_vlans = 0; + } + proto_again: fdret = FLOW_DISSECT_RET_CONTINUE; @@ -1158,6 +1168,16 @@ proto_again: nhoff += sizeof(*vlan); } + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) { + struct flow_dissector_key_num_of_vlans *key_nvs; + + key_nvs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, + target_container); + key_nvs->num_of_vlans++; + } + if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) { dissector_vlan = FLOW_DISSECTOR_KEY_VLAN; } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) { diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 95098d1a49bd..a244d3bade7d 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -18,6 +18,7 @@ #include <linux/bitops.h> #include <linux/types.h> +#include "dev.h" enum lw_bits { LW_URGENT = 0, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f64ebd050f6c..47b6c1f0fdbb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3728,7 +3728,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; char *p_name; - t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); + t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT); if (!t) goto err; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 88cc0ad7d386..1ec23bf8b05c 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -4,6 +4,8 @@ #include <linux/seq_file.h> #include <net/wext.h> +#include "dev.h" + #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) #define get_bucket(x) ((x) >> BUCKET_SPACE) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9cbc1c8289bc..4980c3a50475 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include <linux/of_net.h> #include <linux/cpu.h> +#include "dev.h" #include "net-sysfs.h" #ifdef CONFIG_SYSFS diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 1943c0f0307d..bdbadfaee867 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -18,6 +18,7 @@ #include <linux/page-flags.h> #include <linux/mm.h> /* for __put_page() */ #include <linux/poison.h> +#include <linux/ethtool.h> #include <trace/events/page_pool.h> @@ -36,6 +37,26 @@ this_cpu_inc(s->__stat); \ } while (0) +#define recycle_stat_add(pool, __stat, val) \ + do { \ + struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ + this_cpu_add(s->__stat, val); \ + } while (0) + +static const char pp_stats[][ETH_GSTRING_LEN] = { + "rx_pp_alloc_fast", + "rx_pp_alloc_slow", + "rx_pp_alloc_slow_ho", + "rx_pp_alloc_empty", + "rx_pp_alloc_refill", + "rx_pp_alloc_waive", + "rx_pp_recycle_cached", + "rx_pp_recycle_cache_full", + "rx_pp_recycle_ring", + "rx_pp_recycle_ring_full", + "rx_pp_recycle_released_ref", +}; + bool page_pool_get_stats(struct page_pool *pool, struct page_pool_stats *stats) { @@ -44,7 +65,13 @@ bool page_pool_get_stats(struct page_pool *pool, if (!stats) return false; - memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats)); + /* The caller is responsible to initialize stats. */ + stats->alloc_stats.fast += pool->alloc_stats.fast; + stats->alloc_stats.slow += pool->alloc_stats.slow; + stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order; + stats->alloc_stats.empty += pool->alloc_stats.empty; + stats->alloc_stats.refill += pool->alloc_stats.refill; + stats->alloc_stats.waive += pool->alloc_stats.waive; for_each_possible_cpu(cpu) { const struct page_pool_recycle_stats *pcpu = @@ -60,9 +87,50 @@ bool page_pool_get_stats(struct page_pool *pool, return true; } EXPORT_SYMBOL(page_pool_get_stats); + +u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pp_stats); i++) { + memcpy(data, pp_stats[i], ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + return data; +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings); + +int page_pool_ethtool_stats_get_count(void) +{ + return ARRAY_SIZE(pp_stats); +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); + +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + struct page_pool_stats *pool_stats = stats; + + *data++ = pool_stats->alloc_stats.fast; + *data++ = pool_stats->alloc_stats.slow; + *data++ = pool_stats->alloc_stats.slow_high_order; + *data++ = pool_stats->alloc_stats.empty; + *data++ = pool_stats->alloc_stats.refill; + *data++ = pool_stats->alloc_stats.waive; + *data++ = pool_stats->recycle_stats.cached; + *data++ = pool_stats->recycle_stats.cache_full; + *data++ = pool_stats->recycle_stats.ring; + *data++ = pool_stats->recycle_stats.ring_full; + *data++ = pool_stats->recycle_stats.released_refcnt; + + return data; +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get); + #else #define alloc_stat_inc(pool, __stat) #define recycle_stat_inc(pool, __stat) +#define recycle_stat_add(pool, __stat, val) #endif static int page_pool_init(struct page_pool *pool, @@ -566,9 +634,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, /* Bulk producer into ptr_ring page_pool cache */ page_pool_ring_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) - break; /* ring full */ + if (__ptr_ring_produce(&pool->ring, data[i])) { + /* ring full */ + recycle_stat_inc(pool, ring_full); + break; + } } + recycle_stat_add(pool, ring, i); page_pool_ring_unlock(pool); /* Hopefully all pages was return into ptr_ring */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d1381ea6d52e..eea5ed09e1bb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -54,6 +54,8 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include "dev.h" + #define RTNL_MAX_TYPE 50 #define RTNL_SLAVE_MAX_TYPE 40 @@ -95,6 +97,39 @@ void __rtnl_unlock(void) defer_kfree_skb_list = NULL; + /* Ensure that we didn't actually add any TODO item when __rtnl_unlock() + * is used. In some places, e.g. in cfg80211, we have code that will do + * something like + * rtnl_lock() + * wiphy_lock() + * ... + * rtnl_unlock() + * + * and because netdev_run_todo() acquires the RTNL for items on the list + * we could cause a situation such as this: + * Thread 1 Thread 2 + * rtnl_lock() + * unregister_netdevice() + * __rtnl_unlock() + * rtnl_lock() + * wiphy_lock() + * rtnl_unlock() + * netdev_run_todo() + * __rtnl_unlock() + * + * // list not empty now + * // because of thread 2 + * rtnl_lock() + * while (!list_empty(...)) + * rtnl_lock() + * wiphy_lock() + * **** DEADLOCK **** + * + * However, usage of __rtnl_unlock() is rare, and so we can ensure that + * it's not used in cases where something is added to do the list. + */ + WARN_ON(!list_empty(&net_todo_list)); + mutex_unlock(&rtnl_mutex); while (head) { @@ -214,6 +249,8 @@ static int rtnl_register_internal(struct module *owner, if (dumpit) link->dumpit = dumpit; + WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL && + (flags & RTNL_FLAG_BULK_DEL_SUPPORTED)); link->flags |= flags; /* publish protocol:msgtype */ @@ -2607,17 +2644,23 @@ static int do_set_proto_down(struct net_device *dev, static int do_setlink(const struct sk_buff *skb, struct net_device *dev, struct ifinfomsg *ifm, struct netlink_ext_ack *extack, - struct nlattr **tb, char *ifname, int status) + struct nlattr **tb, int status) { const struct net_device_ops *ops = dev->netdev_ops; + char ifname[IFNAMSIZ]; int err; err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; + if (tb[IFLA_IFNAME]) + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { - const char *pat = ifname && ifname[0] ? ifname : NULL; + const char *pat = ifname[0] ? ifname : NULL; struct net *net; int new_ifindex; @@ -2973,21 +3016,16 @@ errout: } static struct net_device *rtnl_dev_get(struct net *net, - struct nlattr *ifname_attr, - struct nlattr *altifname_attr, - char *ifname) -{ - char buffer[ALTIFNAMSIZ]; - - if (!ifname) { - ifname = buffer; - if (ifname_attr) - nla_strscpy(ifname, ifname_attr, IFNAMSIZ); - else if (altifname_attr) - nla_strscpy(ifname, altifname_attr, ALTIFNAMSIZ); - else - return NULL; - } + struct nlattr *tb[]) +{ + char ifname[ALTIFNAMSIZ]; + + if (tb[IFLA_IFNAME]) + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else if (tb[IFLA_ALT_IFNAME]) + nla_strscpy(ifname, tb[IFLA_ALT_IFNAME], ALTIFNAMSIZ); + else + return NULL; return __dev_get_by_name(net, ifname); } @@ -3000,7 +3038,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); @@ -3011,17 +3048,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; - if (tb[IFLA_IFNAME]) - nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname); + dev = rtnl_dev_get(net, tb); else goto errout; @@ -3030,7 +3062,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, extack, tb, 0); errout: return err; } @@ -3119,15 +3151,14 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, tb[IFLA_IFNAME], - tb[IFLA_ALT_IFNAME], NULL); + dev = rtnl_dev_get(net, tb); else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else goto out; if (!dev) { - if (tb[IFLA_IFNAME] || ifm->ifi_index > 0) + if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0) err = -ENODEV; goto out; @@ -3262,7 +3293,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { - err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, extack, tb, 0); if (err < 0) return err; } @@ -3271,24 +3302,118 @@ static int rtnl_group_changelink(const struct sk_buff *skb, return 0; } -static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attr, struct netlink_ext_ack *extack) +static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, + const struct rtnl_link_ops *ops, + struct nlattr **tb, struct nlattr **data, + struct netlink_ext_ack *extack) { - struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; + struct net *net = sock_net(skb->sk); + struct net *dest_net, *link_net; + struct net_device *dev; + char ifname[IFNAMSIZ]; + int err; + + if (!ops->alloc && !ops->setup) + return -EOPNOTSUPP; + + if (tb[IFLA_IFNAME]) { + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + } else { + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + name_assign_type = NET_NAME_ENUM; + } + + dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); + + link_net = get_net_ns_by_id(dest_net, id); + if (!link_net) { + NL_SET_ERR_MSG(extack, "Unknown network namespace id"); + err = -EINVAL; + goto out; + } + err = -EPERM; + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) + goto out; + } else { + link_net = NULL; + } + + dev = rtnl_create_link(link_net ? : dest_net, ifname, + name_assign_type, ops, tb, extack); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out; + } + + dev->ifindex = ifm->ifi_index; + + if (ops->newlink) + err = ops->newlink(link_net ? : net, dev, tb, data, extack); + else + err = register_netdevice(dev); + if (err < 0) { + free_netdev(dev); + goto out; + } + + err = rtnl_configure_link(dev, ifm); + if (err < 0) + goto out_unregister; + if (link_net) { + err = dev_change_net_namespace(dev, dest_net, ifname); + if (err < 0) + goto out_unregister; + } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); + if (err) + goto out_unregister; + } +out: + if (link_net) + put_net(link_net); + put_net(dest_net); + return err; +out_unregister: + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } + goto out; +} + +struct rtnl_newlink_tbs { + struct nlattr *tb[IFLA_MAX + 1]; + struct nlattr *attr[RTNL_MAX_TYPE + 1]; + struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; +}; + +static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtnl_newlink_tbs *tbs, + struct netlink_ext_ack *extack) +{ struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; + struct nlattr ** const tb = tbs->tb; const struct rtnl_link_ops *m_ops; struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; - struct nlattr *tb[IFLA_MAX + 1]; - struct net *dest_net, *link_net; struct nlattr **slave_data; char kind[MODULE_NAME_LEN]; struct net_device *dev; struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; struct nlattr **data; + bool link_specified; int err; #ifdef CONFIG_MODULES @@ -3303,18 +3428,17 @@ replay: if (err < 0) return err; - if (tb[IFLA_IFNAME]) - nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) + if (ifm->ifi_index > 0) { + link_specified = true; dev = __dev_get_by_index(net, ifm->ifi_index); - else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname); - else + } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { + link_specified = true; + dev = rtnl_dev_get(net, tb); + } else { + link_specified = false; dev = NULL; + } master_dev = NULL; m_ops = NULL; @@ -3351,12 +3475,12 @@ replay: return -EINVAL; if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested_deprecated(attr, ops->maxtype, + err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, linkinfo[IFLA_INFO_DATA], ops->policy, extack); if (err < 0) return err; - data = attr; + data = tbs->attr; } if (ops->validate) { err = ops->validate(tb, data, extack); @@ -3372,14 +3496,14 @@ replay: if (m_ops->slave_maxtype && linkinfo[IFLA_INFO_SLAVE_DATA]) { - err = nla_parse_nested_deprecated(slave_attr, + err = nla_parse_nested_deprecated(tbs->slave_attr, m_ops->slave_maxtype, linkinfo[IFLA_INFO_SLAVE_DATA], m_ops->slave_policy, extack); if (err < 0) return err; - slave_data = slave_attr; + slave_data = tbs->slave_attr; } } @@ -3413,11 +3537,16 @@ replay: status |= DO_SETLINK_NOTIFY; } - return do_setlink(skb, dev, ifm, extack, tb, ifname, status); + return do_setlink(skb, dev, ifm, extack, tb, status); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) + /* No dev found and NLM_F_CREATE not set. Requested dev does not exist, + * or it's for a group + */ + if (link_specified) + return -ENODEV; + if (tb[IFLA_GROUP]) return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), ifm, extack, tb); @@ -3442,94 +3571,21 @@ replay: return -EOPNOTSUPP; } - if (!ops->alloc && !ops->setup) - return -EOPNOTSUPP; - - if (!ifname[0]) { - snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - name_assign_type = NET_NAME_ENUM; - } - - dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - - if (tb[IFLA_LINK_NETNSID]) { - int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); - - link_net = get_net_ns_by_id(dest_net, id); - if (!link_net) { - NL_SET_ERR_MSG(extack, "Unknown network namespace id"); - err = -EINVAL; - goto out; - } - err = -EPERM; - if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) - goto out; - } else { - link_net = NULL; - } - - dev = rtnl_create_link(link_net ? : dest_net, ifname, - name_assign_type, ops, tb, extack); - if (IS_ERR(dev)) { - err = PTR_ERR(dev); - goto out; - } - - dev->ifindex = ifm->ifi_index; - - if (ops->newlink) - err = ops->newlink(link_net ? : net, dev, tb, data, extack); - else - err = register_netdevice(dev); - if (err < 0) { - free_netdev(dev); - goto out; - } - - err = rtnl_configure_link(dev, ifm); - if (err < 0) - goto out_unregister; - if (link_net) { - err = dev_change_net_namespace(dev, dest_net, ifname); - if (err < 0) - goto out_unregister; - } - if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); - if (err) - goto out_unregister; - } -out: - if (link_net) - put_net(link_net); - put_net(dest_net); - return err; -out_unregister: - if (ops->newlink) { - LIST_HEAD(list_kill); - - ops->dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); - } else { - unregister_netdevice(dev); - } - goto out; + return rtnl_newlink_create(skb, ifm, ops, tb, data, extack); } static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct nlattr **attr; + struct rtnl_newlink_tbs *tbs; int ret; - attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL); - if (!attr) + tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); + if (!tbs) return -ENOMEM; - ret = __rtnl_newlink(skb, nlh, attr, extack); - kfree(attr); + ret = __rtnl_newlink(skb, nlh, tbs, extack); + kfree(tbs); return ret; } @@ -3617,8 +3673,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(tgt_net, tb[IFLA_IFNAME], - tb[IFLA_ALT_IFNAME], NULL); + dev = rtnl_dev_get(tgt_net, tb); else goto out; @@ -3713,8 +3768,7 @@ static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, tb[IFLA_IFNAME], - tb[IFLA_ALT_IFNAME], NULL); + dev = rtnl_dev_get(net, tb); else return -EINVAL; @@ -4132,22 +4186,36 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); +static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = { + [NDA_VLAN] = { .type = NLA_U16 }, + [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct net *net = sock_net(skb->sk); + const struct net_device_ops *ops; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; - __u8 *addr; + __u8 *addr = NULL; int err; u16 vid; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, - extack); + if (!del_bulk) { + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, + NULL, extack); + } else { + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, + fdb_del_bulk_policy, extack); + } if (err < 0) return err; @@ -4163,9 +4231,12 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - NL_SET_ERR_MSG(extack, "invalid address"); - return -EINVAL; + if (!del_bulk) { + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + NL_SET_ERR_MSG(extack, "invalid address"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); } if (dev->type != ARPHRD_ETHER) { @@ -4173,8 +4244,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -4185,10 +4254,16 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); - const struct net_device_ops *ops = br_dev->netdev_ops; - if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + ops = br_dev->netdev_ops; + if (!del_bulk) { + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + } else { + if (ops->ndo_fdb_del_bulk) + err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, + extack); + } if (err) goto out; @@ -4198,15 +4273,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if (ndm->ndm_flags & NTF_SELF) { - if (dev->netdev_ops->ndo_fdb_del) - err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr, - vid); - else - err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); + ops = dev->netdev_ops; + if (!del_bulk) { + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + else + err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); + } else { + /* in case err was cleared by NTF_MASTER call */ + err = -EOPNOTSUPP; + if (ops->ndo_fdb_del_bulk) + err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, + extack); + } if (!err) { - rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, - ndm->ndm_state); + if (!del_bulk) + rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, + ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } @@ -5896,11 +5980,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct rtnl_link *link; + enum rtnl_kinds kind; struct module *owner; int err = -EOPNOTSUPP; rtnl_doit_func doit; unsigned int flags; - int kind; int family; int type; @@ -5915,13 +5999,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; - kind = type&3; + kind = rtnl_msgtype_kind(type); - if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) + if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; rcu_read_lock(); - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) { struct sock *rtnl; rtnl_dumpit_func dumpit; u32 min_dump_alloc = 0; @@ -5977,6 +6061,12 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } flags = link->flags; + if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) && + !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) { + NL_SET_ERR_MSG(extack, "Bulk delete is not supported"); + goto err_unlock; + } + if (flags & RTNL_FLAG_DOIT_UNLOCKED) { doit = link->doit; rcu_read_unlock(); @@ -6105,7 +6195,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0); rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, + RTNL_FLAG_BULK_DEL_SUPPORTED); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 30b523fa4ad2..475183f37891 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,7 +80,6 @@ #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> -#include "datagram.h" #include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; @@ -204,7 +203,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; - + skb->alloc_cpu = raw_smp_processor_id(); /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); @@ -1037,6 +1036,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif + CHECK_SKB_FIELD(alloc_cpu); #ifdef CONFIG_XPS CHECK_SKB_FIELD(sender_cpu); #endif @@ -1339,18 +1339,11 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) } EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort); -int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) -{ - return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); -} -EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); - int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) { struct ubuf_info *orig_uarg = skb_zcopy(skb); - struct iov_iter orig_iter = msg->msg_iter; int err, orig_len = skb->len; /* An skb can only point to one uarg. This edge case happens when @@ -1364,7 +1357,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct sock *save_sk = skb->sk; /* Streams do not free skb on error. Reset to prev state. */ - msg->msg_iter = orig_iter; + iov_iter_revert(&msg->msg_iter, skb->len - orig_len); skb->sk = sk; ___pskb_trim(skb, orig_len); skb->sk = save_sk; @@ -5601,7 +5594,7 @@ err_free: } EXPORT_SYMBOL(skb_vlan_untag); -int skb_ensure_writable(struct sk_buff *skb, int write_len) +int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) { if (!pskb_may_pull(skb, write_len)) return -ENOMEM; @@ -6486,3 +6479,51 @@ free_now: } EXPORT_SYMBOL(__skb_ext_put); #endif /* CONFIG_SKB_EXTENSIONS */ + +/** + * skb_attempt_defer_free - queue skb for remote freeing + * @skb: buffer + * + * Put @skb in a per-cpu list, using the cpu which + * allocated the skb/pages to reduce false sharing + * and memory zone spinlock contention. + */ +void skb_attempt_defer_free(struct sk_buff *skb) +{ + int cpu = skb->alloc_cpu; + struct softnet_data *sd; + unsigned long flags; + bool kick; + + if (WARN_ON_ONCE(cpu >= nr_cpu_ids) || + !cpu_online(cpu) || + cpu == raw_smp_processor_id()) { + __kfree_skb(skb); + return; + } + + sd = &per_cpu(softnet_data, cpu); + /* We do not send an IPI or any signal. + * Remote cpu will eventually call skb_defer_free_flush() + */ + spin_lock_irqsave(&sd->defer_lock, flags); + skb->next = sd->defer_list; + /* Paired with READ_ONCE() in skb_defer_free_flush() */ + WRITE_ONCE(sd->defer_list, skb); + sd->defer_count++; + + /* kick every time queue length reaches 128. + * This should avoid blocking in smp_call_function_single_async(). + * This condition should hardly be bit under normal conditions, + * unless cpu suddenly stopped to receive NIC interrupts. + */ + kick = sd->defer_count == 128; + + spin_unlock_irqrestore(&sd->defer_lock, flags); + + /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU + * if we are unlucky enough (this seems very unlikely). + */ + if (unlikely(kick)) + smp_call_function_single_async(cpu, &sd->defer_csd); +} diff --git a/net/core/sock.c b/net/core/sock.c index 1180a0cb0110..be20a1af20e5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -141,9 +141,14 @@ #include <linux/ethtool.h> +#include "dev.h" + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +static void sock_def_write_space_wfree(struct sock *sk); +static void sock_def_write_space(struct sock *sk); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@ -503,17 +508,35 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(__sock_queue_rcv_skb); -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason) { + enum skb_drop_reason drop_reason; int err; err = sk_filter(sk, skb); - if (err) - return err; - - return __sock_queue_rcv_skb(sk, skb); + if (err) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + goto out; + } + err = __sock_queue_rcv_skb(sk, skb); + switch (err) { + case -ENOMEM: + drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; + break; + case -ENOBUFS: + drop_reason = SKB_DROP_REASON_PROTO_MEM; + break; + default: + drop_reason = SKB_NOT_DROPPED_YET; + break; + } +out: + if (reason) + *reason = drop_reason; + return err; } -EXPORT_SYMBOL(sock_queue_rcv_skb); +EXPORT_SYMBOL(sock_queue_rcv_skb_reason); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, unsigned int trim_cap, bool refcounted) @@ -1291,6 +1314,9 @@ set_sndbuf: __sock_set_mark(sk, val); break; + case SO_RCVMARK: + sock_valbool_flag(sk, SOCK_RCVMARK, valbool); + break; case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); @@ -1717,6 +1743,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_mark; break; + case SO_RCVMARK: + v.val = sock_flag(sk, SOCK_RCVMARK); + break; + case SO_RXQ_OVFL: v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; @@ -2062,9 +2092,6 @@ void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); - WARN_ON_ONCE(!llist_empty(&sk->defer_list)); - sk_defer_free_flush(sk); - if (rcu_access_pointer(sk->sk_reuseport_cb)) { reuseport_detach_sock(sk); use_call_rcu = true; @@ -2300,8 +2327,20 @@ void sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; unsigned int len = skb->truesize; + bool free; if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { + if (sock_flag(sk, SOCK_RCU_FREE) && + sk->sk_write_space == sock_def_write_space) { + rcu_read_lock(); + free = refcount_sub_and_test(len, &sk->sk_wmem_alloc); + sock_def_write_space_wfree(sk); + rcu_read_unlock(); + if (unlikely(free)) + __sk_free(sk); + return; + } + /* * Keep a reference on sk_wmem_alloc, this will be released * after sk_write_space() call @@ -2611,13 +2650,6 @@ failure: } EXPORT_SYMBOL(sock_alloc_send_pskb); -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, - int noblock, int *errcode) -{ - return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); -} -EXPORT_SYMBOL(sock_alloc_send_skb); - int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, struct sockcm_cookie *sockc) { @@ -3174,20 +3206,42 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { + if (sock_writeable(sk)) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); /* Should agree with poll, otherwise some programs break */ - if (sock_writeable(sk)) - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } +/* An optimised version of sock_def_write_space(), should only be called + * for SOCK_RCU_FREE sockets under RCU read section and after putting + * ->sk_wmem_alloc. + */ +static void sock_def_write_space_wfree(struct sock *sk) +{ + /* Do not wake up a writer until he can make "significant" + * progress. --DaveM + */ + if (sock_writeable(sk)) { + struct socket_wq *wq = rcu_dereference(sk->sk_wq); + + /* rely on refcount_sub from sock_wfree() */ + smp_mb__after_atomic(); + if (wq && waitqueue_active(&wq->wait)) + wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); + + /* Should agree with poll, otherwise some programs break */ + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + } +} + static void sock_def_destruct(struct sock *sk) { } @@ -3486,8 +3540,7 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int addr_len = 0; int err; - err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, &addr_len); + err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 2d213c4011db..81d4b4756a02 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -793,7 +793,7 @@ static const struct bpf_iter_seq_info sock_map_iter_seq_info = { .seq_priv_size = sizeof(struct sock_map_seq_info), }; -static int sock_map_btf_id; +BTF_ID_LIST_SINGLE(sock_map_btf_ids, struct, bpf_stab) const struct bpf_map_ops sock_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = sock_map_alloc, @@ -805,8 +805,7 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_stab", - .map_btf_id = &sock_map_btf_id, + .map_btf_id = &sock_map_btf_ids[0], .iter_seq_info = &sock_map_iter_seq_info, }; @@ -1385,7 +1384,7 @@ static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { .seq_priv_size = sizeof(struct sock_hash_seq_info), }; -static int sock_hash_map_btf_id; +BTF_ID_LIST_SINGLE(sock_hash_map_btf_ids, struct, bpf_shtab) const struct bpf_map_ops sock_hash_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = sock_hash_alloc, @@ -1397,8 +1396,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_shtab", - .map_btf_id = &sock_hash_map_btf_id, + .map_btf_id = &sock_hash_map_btf_ids[0], .iter_seq_info = &sock_hash_iter_seq_info, }; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7123fe7feeac..195ca5c28771 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,13 +23,12 @@ #include <net/busy_poll.h> #include <net/pkt_sched.h> -static int two = 2; -static int three = 3; +#include "dev.h" + static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; -static long long_one __maybe_unused = 1; static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -388,7 +387,7 @@ static struct ctl_table net_core_table[] = { .extra2 = SYSCTL_ONE, # else .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, # endif }, # ifdef CONFIG_HAVE_EBPF_JIT @@ -399,7 +398,7 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dointvec_minmax_bpf_restricted, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "bpf_jit_kallsyms", @@ -417,7 +416,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(long), .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, - .extra1 = &long_one, + .extra1 = SYSCTL_LONG_ONE, .extra2 = &bpf_jit_limit_max, }, #endif @@ -544,7 +543,7 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "devconf_inherit_init_net", @@ -553,7 +552,7 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "high_order_alloc_disable", diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 671c377f0889..7dfc00c9fb32 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -293,8 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len); +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); __poll_t dccp_poll(struct file *file, struct socket *sock, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ae662567a6cb..82696ab86f74 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -76,9 +76,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_dport = usin->sin_port; fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_DCCP, - orig_sport, orig_dport, sk); + sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport, + orig_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index eab3bd1ee9a0..4d95b6400915 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -892,7 +892,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); - if (__ipv6_only_sock(sk)) + if (ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a976b4d29892..58421f94427e 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -791,8 +791,8 @@ out_discard: EXPORT_SYMBOL_GPL(dccp_sendmsg); -int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len) +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { const struct dccp_hdr *dh; long timeo; @@ -804,7 +804,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, goto out; } - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 89c6c86e746f..0c6ae32742ec 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -7,19 +7,10 @@ #include <linux/device.h> #include <linux/list.h> -#include <linux/platform_device.h> -#include <linux/slab.h> #include <linux/module.h> -#include <linux/notifier.h> -#include <linux/of.h> -#include <linux/of_mdio.h> -#include <linux/of_platform.h> -#include <linux/of_net.h> #include <linux/netdevice.h> #include <linux/sysfs.h> -#include <linux/phy_fixed.h> #include <linux/ptp_classify.h> -#include <linux/etherdevice.h> #include "dsa_priv.h" diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 5d3f4a67dce1..7c9abd5a0ab9 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -54,18 +54,15 @@ struct dsa_notifier_ageing_time_info { /* DSA_NOTIFIER_BRIDGE_* */ struct dsa_notifier_bridge_info { + const struct dsa_port *dp; struct dsa_bridge bridge; - int tree_index; - int sw_index; - int port; bool tx_fwd_offload; struct netlink_ext_ack *extack; }; /* DSA_NOTIFIER_FDB_* */ struct dsa_notifier_fdb_info { - int sw_index; - int port; + const struct dsa_port *dp; const unsigned char *addr; u16 vid; struct dsa_db db; @@ -81,34 +78,28 @@ struct dsa_notifier_lag_fdb_info { /* DSA_NOTIFIER_MDB_* */ struct dsa_notifier_mdb_info { + const struct dsa_port *dp; const struct switchdev_obj_port_mdb *mdb; - int sw_index; - int port; struct dsa_db db; }; /* DSA_NOTIFIER_LAG_* */ struct dsa_notifier_lag_info { + const struct dsa_port *dp; struct dsa_lag lag; - int sw_index; - int port; - struct netdev_lag_upper_info *info; }; /* DSA_NOTIFIER_VLAN_* */ struct dsa_notifier_vlan_info { + const struct dsa_port *dp; const struct switchdev_obj_port_vlan *vlan; - int sw_index; - int port; struct netlink_ext_ack *extack; }; /* DSA_NOTIFIER_MTU */ struct dsa_notifier_mtu_info { - bool targeted_match; - int sw_index; - int port; + const struct dsa_port *dp; int mtu; }; @@ -119,9 +110,7 @@ struct dsa_notifier_tag_proto_info { /* DSA_NOTIFIER_TAG_8021Q_VLAN_* */ struct dsa_notifier_tag_8021q_vlan_info { - int tree_index; - int sw_index; - int port; + const struct dsa_port *dp; u16 vid; }; @@ -241,8 +230,7 @@ int dsa_port_mst_enable(struct dsa_port *dp, bool on, struct netlink_ext_ack *extack); int dsa_port_vlan_msti(struct dsa_port *dp, const struct switchdev_vlan_msti *msti); -int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, - bool targeted_match); +int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu); int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, diff --git a/net/dsa/port.c b/net/dsa/port.c index cdc56ba11f52..48e5a309ca5c 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -242,6 +242,59 @@ void dsa_port_disable(struct dsa_port *dp) rtnl_unlock(); } +static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, + struct dsa_bridge bridge) +{ + struct netlink_ext_ack extack = {0}; + bool change_vlan_filtering = false; + struct dsa_switch *ds = dp->ds; + bool vlan_filtering; + int err; + + if (ds->needs_standalone_vlan_filtering && + !br_vlan_enabled(bridge.dev)) { + change_vlan_filtering = true; + vlan_filtering = true; + } else if (!ds->needs_standalone_vlan_filtering && + br_vlan_enabled(bridge.dev)) { + change_vlan_filtering = true; + vlan_filtering = false; + } + + /* If the bridge was vlan_filtering, the bridge core doesn't trigger an + * event for changing vlan_filtering setting upon slave ports leaving + * it. That is a good thing, because that lets us handle it and also + * handle the case where the switch's vlan_filtering setting is global + * (not per port). When that happens, the correct moment to trigger the + * vlan_filtering callback is only when the last port leaves the last + * VLAN-aware bridge. + */ + if (change_vlan_filtering && ds->vlan_filtering_is_global) { + dsa_switch_for_each_port(dp, ds) { + struct net_device *br = dsa_port_bridge_dev_get(dp); + + if (br && br_vlan_enabled(br)) { + change_vlan_filtering = false; + break; + } + } + } + + if (!change_vlan_filtering) + return; + + err = dsa_port_vlan_filtering(dp, vlan_filtering, &extack); + if (extack._msg) { + dev_err(ds->dev, "port %d: %s\n", dp->index, + extack._msg); + } + if (err && err != -EOPNOTSUPP) { + dev_err(ds->dev, + "port %d failed to reset VLAN filtering to %d: %pe\n", + dp->index, vlan_filtering, ERR_PTR(err)); + } +} + static int dsa_port_inherit_brport_flags(struct dsa_port *dp, struct netlink_ext_ack *extack) { @@ -313,7 +366,8 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, return 0; } -static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) +static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp, + struct dsa_bridge bridge) { /* Configure the port for standalone mode (no address learning, * flood everything). @@ -333,7 +387,7 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) */ dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true); - /* VLAN filtering is handled by dsa_switch_bridge_leave */ + dsa_port_reset_vlan_filtering(dp, bridge); /* Ageing time may be global to the switch chip, so don't change it * here because we have no good reason (or value) to change it to. @@ -405,9 +459,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack) { struct dsa_notifier_bridge_info info = { - .tree_index = dp->ds->dst->index, - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .extack = extack, }; struct net_device *dev = dp->slave; @@ -476,9 +528,7 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { - .tree_index = dp->ds->dst->index, - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, }; int err; @@ -501,15 +551,14 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) "port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n", dp->index, ERR_PTR(err)); - dsa_port_switchdev_unsync_attrs(dp); + dsa_port_switchdev_unsync_attrs(dp, info.bridge); } int dsa_port_lag_change(struct dsa_port *dp, struct netdev_lag_lower_state_info *linfo) { struct dsa_notifier_lag_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, }; bool tx_enabled; @@ -578,8 +627,7 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, struct netlink_ext_ack *extack) { struct dsa_notifier_lag_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .info = uinfo, }; struct net_device *bridge_dev; @@ -624,8 +672,7 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev) { struct net_device *br = dsa_port_bridge_dev_get(dp); struct dsa_notifier_lag_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, }; int err; @@ -883,13 +930,10 @@ int dsa_port_vlan_msti(struct dsa_port *dp, return ds->ops->vlan_msti_set(ds, *dp->bridge, msti); } -int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, - bool targeted_match) +int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu) { struct dsa_notifier_mtu_info info = { - .sw_index = dp->ds->index, - .targeted_match = targeted_match, - .port = dp->index, + .dp = dp, .mtu = new_mtu, }; @@ -900,8 +944,7 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid) { struct dsa_notifier_fdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .addr = addr, .vid = vid, .db = { @@ -924,8 +967,7 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid) { struct dsa_notifier_fdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .addr = addr, .vid = vid, .db = { @@ -945,8 +987,7 @@ static int dsa_port_host_fdb_add(struct dsa_port *dp, struct dsa_db db) { struct dsa_notifier_fdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .addr = addr, .vid = vid, .db = db, @@ -997,8 +1038,7 @@ static int dsa_port_host_fdb_del(struct dsa_port *dp, struct dsa_db db) { struct dsa_notifier_fdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .addr = addr, .vid = vid, .db = db, @@ -1093,8 +1133,7 @@ int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .mdb = mdb, .db = { .type = DSA_DB_BRIDGE, @@ -1112,8 +1151,7 @@ int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .mdb = mdb, .db = { .type = DSA_DB_BRIDGE, @@ -1132,8 +1170,7 @@ static int dsa_port_host_mdb_add(const struct dsa_port *dp, struct dsa_db db) { struct dsa_notifier_mdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .mdb = mdb, .db = db, }; @@ -1177,8 +1214,7 @@ static int dsa_port_host_mdb_del(const struct dsa_port *dp, struct dsa_db db) { struct dsa_notifier_mdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .mdb = mdb, .db = db, }; @@ -1222,8 +1258,7 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct netlink_ext_ack *extack) { struct dsa_notifier_vlan_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .vlan = vlan, .extack = extack, }; @@ -1235,8 +1270,7 @@ int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan) { struct dsa_notifier_vlan_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .vlan = vlan, }; @@ -1248,8 +1282,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, struct netlink_ext_ack *extack) { struct dsa_notifier_vlan_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .vlan = vlan, .extack = extack, }; @@ -1269,8 +1302,7 @@ int dsa_port_host_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan) { struct dsa_notifier_vlan_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .vlan = vlan, }; struct dsa_port *cpu_dp = dp->cpu_dp; @@ -1691,9 +1723,7 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr) int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast) { struct dsa_notifier_tag_8021q_vlan_info info = { - .tree_index = dp->ds->dst->index, - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .vid = vid, }; @@ -1706,9 +1736,7 @@ int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast) void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast) { struct dsa_notifier_tag_8021q_vlan_info info = { - .tree_index = dp->ds->dst->index, - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .vid = vid, }; int err; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8022d50584db..5ee0aced9410 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1806,11 +1806,9 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - struct dsa_port *dp_iter; - struct dsa_port *cpu_dp; - int port = p->dp->index; + struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; int largest_mtu = 0; int new_master_mtu; int old_master_mtu; @@ -1821,33 +1819,28 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) if (!ds->ops->port_change_mtu) return -EOPNOTSUPP; - list_for_each_entry(dp_iter, &ds->dst->ports, list) { + dsa_tree_for_each_user_port(other_dp, ds->dst) { int slave_mtu; - if (!dsa_port_is_user(dp_iter)) - continue; - /* During probe, this function will be called for each slave * device, while not all of them have been allocated. That's * ok, it doesn't change what the maximum is, so ignore it. */ - if (!dp_iter->slave) + if (!other_dp->slave) continue; /* Pretend that we already applied the setting, which we * actually haven't (still haven't done all integrity checks) */ - if (dp_iter == dp) + if (dp == other_dp) slave_mtu = new_mtu; else - slave_mtu = dp_iter->slave->mtu; + slave_mtu = other_dp->slave->mtu; if (largest_mtu < slave_mtu) largest_mtu = slave_mtu; } - cpu_dp = dsa_to_port(ds, port)->cpu_dp; - mtu_limit = min_t(int, master->max_mtu, dev->max_mtu); old_master_mtu = master->mtu; new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops); @@ -1866,15 +1859,14 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) goto out_master_failed; /* We only need to propagate the MTU of the CPU port to - * upstream switches, so create a non-targeted notifier which - * updates all switches. + * upstream switches, so emit a notifier which updates them. */ - err = dsa_port_mtu_change(cpu_dp, cpu_mtu, false); + err = dsa_port_mtu_change(cpu_dp, cpu_mtu); if (err) goto out_cpu_failed; } - err = dsa_port_mtu_change(dp, new_mtu, true); + err = ds->ops->port_change_mtu(ds, dp->index, new_mtu); if (err) goto out_port_failed; @@ -1887,8 +1879,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) out_port_failed: if (new_master_mtu != old_master_mtu) dsa_port_mtu_change(cpu_dp, old_master_mtu - - dsa_tag_protocol_overhead(cpu_dp->tag_ops), - false); + dsa_tag_protocol_overhead(cpu_dp->tag_ops)); out_cpu_failed: if (new_master_mtu != old_master_mtu) dev_set_mtu(master, old_master_mtu); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index d25cd1da3eb3..704975e5c1c2 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -49,19 +49,7 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, static bool dsa_port_mtu_match(struct dsa_port *dp, struct dsa_notifier_mtu_info *info) { - if (dp->ds->index == info->sw_index && dp->index == info->port) - return true; - - /* Do not propagate to other switches in the tree if the notifier was - * targeted for a single switch. - */ - if (info->targeted_match) - return false; - - if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) - return true; - - return false; + return dp == info->dp || dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp); } static int dsa_switch_mtu(struct dsa_switch *ds, @@ -88,25 +76,26 @@ static int dsa_switch_mtu(struct dsa_switch *ds, static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { - struct dsa_switch_tree *dst = ds->dst; int err; - if (dst->index == info->tree_index && ds->index == info->sw_index) { + if (info->dp->ds == ds) { if (!ds->ops->port_bridge_join) return -EOPNOTSUPP; - err = ds->ops->port_bridge_join(ds, info->port, info->bridge, + err = ds->ops->port_bridge_join(ds, info->dp->index, + info->bridge, &info->tx_fwd_offload, info->extack); if (err) return err; } - if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_join) { - err = ds->ops->crosschip_bridge_join(ds, info->tree_index, - info->sw_index, - info->port, info->bridge, + if (info->dp->ds != ds && ds->ops->crosschip_bridge_join) { + err = ds->ops->crosschip_bridge_join(ds, + info->dp->ds->dst->index, + info->dp->ds->index, + info->dp->index, + info->bridge, info->extack); if (err) return err; @@ -115,79 +104,18 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, return 0; } -static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info) -{ - struct netlink_ext_ack extack = {0}; - bool change_vlan_filtering = false; - bool vlan_filtering; - struct dsa_port *dp; - int err; - - if (ds->needs_standalone_vlan_filtering && - !br_vlan_enabled(info->bridge.dev)) { - change_vlan_filtering = true; - vlan_filtering = true; - } else if (!ds->needs_standalone_vlan_filtering && - br_vlan_enabled(info->bridge.dev)) { - change_vlan_filtering = true; - vlan_filtering = false; - } - - /* If the bridge was vlan_filtering, the bridge core doesn't trigger an - * event for changing vlan_filtering setting upon slave ports leaving - * it. That is a good thing, because that lets us handle it and also - * handle the case where the switch's vlan_filtering setting is global - * (not per port). When that happens, the correct moment to trigger the - * vlan_filtering callback is only when the last port leaves the last - * VLAN-aware bridge. - */ - if (change_vlan_filtering && ds->vlan_filtering_is_global) { - dsa_switch_for_each_port(dp, ds) { - struct net_device *br = dsa_port_bridge_dev_get(dp); - - if (br && br_vlan_enabled(br)) { - change_vlan_filtering = false; - break; - } - } - } - - if (change_vlan_filtering) { - err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - vlan_filtering, &extack); - if (extack._msg) - dev_err(ds->dev, "port %d: %s\n", info->port, - extack._msg); - if (err && err != -EOPNOTSUPP) - return err; - } - - return 0; -} - static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { - struct dsa_switch_tree *dst = ds->dst; - int err; + if (info->dp->ds == ds && ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, info->dp->index, info->bridge); - if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, info->port, info->bridge); - - if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_leave) - ds->ops->crosschip_bridge_leave(ds, info->tree_index, - info->sw_index, info->port, + if (info->dp->ds != ds && ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->dp->ds->dst->index, + info->dp->ds->index, + info->dp->index, info->bridge); - if (ds->dst->index == info->tree_index && ds->index == info->sw_index) { - err = dsa_switch_sync_vlan_filtering(ds, info); - if (err) - return err; - } - return 0; } @@ -196,16 +124,11 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * emitted and its dedicated CPU port. */ static bool dsa_port_host_address_match(struct dsa_port *dp, - int info_sw_index, int info_port) + const struct dsa_port *targeted_dp) { - struct dsa_port *targeted_dp, *cpu_dp; - struct dsa_switch *targeted_ds; - - targeted_ds = dsa_switch_find(dp->ds->dst->index, info_sw_index); - targeted_dp = dsa_to_port(targeted_ds, info_port); - cpu_dp = targeted_dp->cpu_dp; + struct dsa_port *cpu_dp = targeted_dp->cpu_dp; - if (dsa_switch_is_upstream_of(dp->ds, targeted_ds)) + if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds)) return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index, cpu_dp->index); @@ -473,8 +396,7 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, return -EOPNOTSUPP; dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { + if (dsa_port_host_address_match(dp, info->dp)) { err = dsa_port_do_fdb_add(dp, info->addr, info->vid, info->db); if (err) @@ -495,8 +417,7 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds, return -EOPNOTSUPP; dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { + if (dsa_port_host_address_match(dp, info->dp)) { err = dsa_port_do_fdb_del(dp, info->addr, info->vid, info->db); if (err) @@ -510,7 +431,7 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds, static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - int port = dsa_towards_port(ds, info->sw_index, info->port); + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_add) @@ -522,7 +443,7 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds, static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - int port = dsa_towards_port(ds, info->sw_index, info->port); + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_del) @@ -570,12 +491,12 @@ static int dsa_switch_lag_fdb_del(struct dsa_switch *ds, static int dsa_switch_lag_change(struct dsa_switch *ds, struct dsa_notifier_lag_info *info) { - if (ds->index == info->sw_index && ds->ops->port_lag_change) - return ds->ops->port_lag_change(ds, info->port); + if (info->dp->ds == ds && ds->ops->port_lag_change) + return ds->ops->port_lag_change(ds, info->dp->index); - if (ds->index != info->sw_index && ds->ops->crosschip_lag_change) - return ds->ops->crosschip_lag_change(ds, info->sw_index, - info->port); + if (info->dp->ds != ds && ds->ops->crosschip_lag_change) + return ds->ops->crosschip_lag_change(ds, info->dp->ds->index, + info->dp->index); return 0; } @@ -583,13 +504,13 @@ static int dsa_switch_lag_change(struct dsa_switch *ds, static int dsa_switch_lag_join(struct dsa_switch *ds, struct dsa_notifier_lag_info *info) { - if (ds->index == info->sw_index && ds->ops->port_lag_join) - return ds->ops->port_lag_join(ds, info->port, info->lag, + if (info->dp->ds == ds && ds->ops->port_lag_join) + return ds->ops->port_lag_join(ds, info->dp->index, info->lag, info->info); - if (ds->index != info->sw_index && ds->ops->crosschip_lag_join) - return ds->ops->crosschip_lag_join(ds, info->sw_index, - info->port, info->lag, + if (info->dp->ds != ds && ds->ops->crosschip_lag_join) + return ds->ops->crosschip_lag_join(ds, info->dp->ds->index, + info->dp->index, info->lag, info->info); return -EOPNOTSUPP; @@ -598,12 +519,12 @@ static int dsa_switch_lag_join(struct dsa_switch *ds, static int dsa_switch_lag_leave(struct dsa_switch *ds, struct dsa_notifier_lag_info *info) { - if (ds->index == info->sw_index && ds->ops->port_lag_leave) - return ds->ops->port_lag_leave(ds, info->port, info->lag); + if (info->dp->ds == ds && ds->ops->port_lag_leave) + return ds->ops->port_lag_leave(ds, info->dp->index, info->lag); - if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave) - return ds->ops->crosschip_lag_leave(ds, info->sw_index, - info->port, info->lag); + if (info->dp->ds != ds && ds->ops->crosschip_lag_leave) + return ds->ops->crosschip_lag_leave(ds, info->dp->ds->index, + info->dp->index, info->lag); return -EOPNOTSUPP; } @@ -611,7 +532,7 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds, static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - int port = dsa_towards_port(ds, info->sw_index, info->port); + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_add) @@ -623,7 +544,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, static int dsa_switch_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - int port = dsa_towards_port(ds, info->sw_index, info->port); + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_del) @@ -642,8 +563,7 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds, return -EOPNOTSUPP; dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { + if (dsa_port_host_address_match(dp, info->dp)) { err = dsa_port_do_mdb_add(dp, info->mdb, info->db); if (err) break; @@ -663,8 +583,7 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds, return -EOPNOTSUPP; dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_address_match(dp, info->sw_index, - info->port)) { + if (dsa_port_host_address_match(dp, info->dp)) { err = dsa_port_do_mdb_del(dp, info->mdb, info->db); if (err) break; @@ -678,29 +597,18 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds, static bool dsa_port_vlan_match(struct dsa_port *dp, struct dsa_notifier_vlan_info *info) { - if (dp->ds->index == info->sw_index && dp->index == info->port) - return true; - - if (dsa_port_is_dsa(dp)) - return true; - - return false; + return dsa_port_is_dsa(dp) || dp == info->dp; } /* Host VLANs match on the targeted port's CPU port, and on all DSA ports * (upstream and downstream) of that switch and its upstream switches. */ static bool dsa_port_host_vlan_match(struct dsa_port *dp, - struct dsa_notifier_vlan_info *info) + const struct dsa_port *targeted_dp) { - struct dsa_port *targeted_dp, *cpu_dp; - struct dsa_switch *targeted_ds; - - targeted_ds = dsa_switch_find(dp->ds->dst->index, info->sw_index); - targeted_dp = dsa_to_port(targeted_ds, info->port); - cpu_dp = targeted_dp->cpu_dp; + struct dsa_port *cpu_dp = targeted_dp->cpu_dp; - if (dsa_switch_is_upstream_of(dp->ds, targeted_ds)) + if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds)) return dsa_port_is_dsa(dp) || dp == cpu_dp; return false; @@ -858,7 +766,7 @@ static int dsa_switch_host_vlan_add(struct dsa_switch *ds, return -EOPNOTSUPP; dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_vlan_match(dp, info)) { + if (dsa_port_host_vlan_match(dp, info->dp)) { err = dsa_port_do_vlan_add(dp, info->vlan, info->extack); if (err) @@ -879,7 +787,7 @@ static int dsa_switch_host_vlan_del(struct dsa_switch *ds, return -EOPNOTSUPP; dsa_switch_for_each_port(dp, ds) { - if (dsa_port_host_vlan_match(dp, info)) { + if (dsa_port_host_vlan_match(dp, info->dp)) { err = dsa_port_do_vlan_del(dp, info->vlan); if (err) return err; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index a786569203f0..01a427800797 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -196,15 +196,7 @@ static bool dsa_port_tag_8021q_vlan_match(struct dsa_port *dp, struct dsa_notifier_tag_8021q_vlan_info *info) { - struct dsa_switch *ds = dp->ds; - - if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) - return true; - - if (ds->dst->index == info->tree_index && ds->index == info->sw_index) - return dp->index == info->port; - - return false; + return dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp) || dp == info->dp; } int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index ebcc812735a4..62b89d6f54fd 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -391,7 +391,7 @@ EXPORT_SYMBOL(ether_setup); struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs) { - return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN, + return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_ENUM, ether_setup, txqs, rxqs); } EXPORT_SYMBOL(alloc_etherdev_mqs); diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0c5210015911..566adf85e658 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -201,6 +201,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(400000, CR4, Full), __DEFINE_LINK_MODE_NAME(100, FX, Half), __DEFINE_LINK_MODE_NAME(100, FX, Full), + __DEFINE_LINK_MODE_NAME(10, T1L, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -236,6 +237,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1 1 #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 +#define __LINK_MODE_LANES_T1L 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -349,6 +351,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100, FX, Half), __DEFINE_LINK_MODE_PARAMS(100, FX, Full), + __DEFINE_LINK_MODE_PARAMS(10, T1L, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 29d01662a48b..7919ddb2371c 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -363,7 +363,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_CQE_SIZE + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 9f33c9689b56..fa3ec8d438f7 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -55,7 +55,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_TX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ - nla_total_size(sizeof(u32)); /* _RINGS_CQE_SIZE */ + nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ + nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */ } static int rings_fill_reply(struct sk_buff *skb, @@ -94,7 +95,8 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, kr->tcp_data_split))) || (kr->cqe_size && - (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size)))) + (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || + nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push)) return -EMSGSIZE; return 0; @@ -123,6 +125,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), + [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), }; int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) @@ -149,6 +152,33 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) if (!ops->get_ringparam || !ops->set_ringparam) goto out_dev; + if (tb[ETHTOOL_A_RINGS_RX_BUF_LEN] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_RX_BUF_LEN], + "setting rx buf len not supported"); + goto out_dev; + } + + if (tb[ETHTOOL_A_RINGS_CQE_SIZE] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_CQE_SIZE], + "setting cqe size not supported"); + goto out_dev; + } + + if (tb[ETHTOOL_A_RINGS_TX_PUSH] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TX_PUSH], + "setting tx push not supported"); + goto out_dev; + } + rtnl_lock(); ret = ethnl_ops_begin(dev); if (ret < 0) @@ -165,6 +195,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); ethnl_update_u32(&kernel_ringparam.cqe_size, tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); + ethnl_update_u8(&kernel_ringparam.tx_push, + tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); ret = 0; if (!mod) goto out_ops; @@ -187,24 +219,6 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) goto out_ops; } - if (kernel_ringparam.rx_buf_len != 0 && - !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) { - ret = -EOPNOTSUPP; - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_RINGS_RX_BUF_LEN], - "setting rx buf len not supported"); - goto out_ops; - } - - if (kernel_ringparam.cqe_size && - !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { - ret = -EOPNOTSUPP; - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_RINGS_CQE_SIZE], - "setting cqe size not supported"); - goto out_ops; - } - ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); if (ret < 0) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 3b2366a88c3c..718fb77bb372 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -308,13 +308,13 @@ out: } static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -328,7 +328,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; @@ -695,7 +695,7 @@ out: } static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -703,7 +703,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct dgram_sock *ro = dgram_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -718,7 +718,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (saddr) { /* Clear the implicit padding in struct sockaddr_ieee802154 diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 87983e70f03f..e983bb0c5012 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -321,7 +321,6 @@ config NET_UDP_TUNNEL config NET_FOU tristate "IP: Foo (IP protocols) over UDP" - select XFRM select NET_UDP_TUNNEL help Foo over UDP allows any IP protocol to be directly encapsulated diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 72fde2888ad2..93da9f783bec 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -836,7 +836,7 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, EXPORT_SYMBOL(inet_sendpage); INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, - size_t, int, int, int *)); + size_t, int, int *)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -848,8 +848,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, sock_rps_record_flow(sk); err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, - sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, &addr_len); + sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; @@ -1234,9 +1233,9 @@ static int inet_sk_reselect_saddr(struct sock *sk) /* Query new route. */ fl4 = &inet->cork.fl.u.ip4; - rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if, sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk); + rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if, + sk->sk_protocol, inet->inet_sport, + inet->inet_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2d0c05ca9c6f..ab4a5601c82a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1304,9 +1304,9 @@ static struct packet_type arp_packet_type __read_mostly = { .func = arp_rcv, }; +#ifdef CONFIG_PROC_FS #if IS_ENABLED(CONFIG_AX25) -/* ------------------------------------------------------------------------ */ /* * ax25 -> ASCII conversion */ @@ -1412,16 +1412,13 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos) return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_SKIP_NOARP); } -/* ------------------------------------------------------------------------ */ - static const struct seq_operations arp_seq_ops = { .start = arp_seq_start, .next = neigh_seq_next, .stop = neigh_seq_stop, .show = arp_seq_show, }; - -/* ------------------------------------------------------------------------ */ +#endif /* CONFIG_PROC_FS */ static int __net_init arp_net_init(struct net *net) { diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 48f337ccf949..ffd57523331f 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -44,10 +44,9 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len saddr = inet->mc_addr; } fl4 = &inet->cork.fl.u.ip4; - rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, - RT_CONN_FLAGS(sk), oif, - sk->sk_protocol, - inet->inet_sport, usin->sin_port, sk); + rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif, + sk->sk_protocol, inet->inet_sport, + usin->sin_port, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 53a6b14dc50a..89141ba5e9ee 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2573,7 +2573,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, struct devinet_sysctl_table *t; char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; - t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); + t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT); if (!t) goto out; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index af8209f912ab..f361d3d56be2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1384,7 +1384,7 @@ static void nl_fib_input(struct sk_buff *skb) return; nlh = nlmsg_hdr(skb); - frn = (struct fib_result_nl *) nlmsg_data(nlh); + frn = nlmsg_data(nlh); nl_fib_lookup(net, frn); portid = NETLINK_CB(skb).portid; /* netlink portid */ @@ -1425,7 +1425,7 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event, static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct in_ifaddr *ifa = ptr; struct net_device *dev = ifa->ifa_dev->dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 001fea394bde..513f475c6a53 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -145,7 +145,7 @@ INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { - struct fib_result *result = (struct fib_result *) arg->result; + struct fib_result *result = arg->result; struct net_device *dev = NULL; if (result->fi) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ccb62038f6a4..a57ba23571c9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -524,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, fri.tb_id = tb_id; fri.dst = key; fri.dst_len = dst_len; - fri.tos = inet_dscp_to_dsfield(fa->fa_dscp); + fri.dscp = fa->fa_dscp; fri.type = fa->fa_type; fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); @@ -1781,7 +1781,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtm->rtm_family = AF_INET; rtm->rtm_dst_len = fri->dst_len; rtm->rtm_src_len = 0; - rtm->rtm_tos = fri->tos; + rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp); if (tb_id < 256) rtm->rtm_table = tb_id; else diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fb0e49c36c2e..2734c3af7e24 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -82,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, - .tos = inet_dscp_to_dsfield(fa->fa_dscp), + .dscp = fa->fa_dscp, .type = fa->fa_type, .tb_id = fa->tb_id, }; @@ -99,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, - .tos = inet_dscp_to_dsfield(fa->fa_dscp), + .dscp = fa->fa_dscp, .type = fa->fa_type, .tb_id = fa->tb_id, }; @@ -1032,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { if (fa->fa_slen == slen && fa->tb_id == fri->tb_id && - fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) && - fa->fa_info == fri->fi && fa->fa_type == fri->type) + fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi && + fa->fa_type == fri->type) return fa; } @@ -2305,7 +2305,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, fri.tb_id = tb->tb_id; fri.dst = xkey; fri.dst_len = KEYLENGTH - fa->fa_slen; - fri.tos = inet_dscp_to_dsfield(fa->fa_dscp); + fri.dscp = fa->fa_dscp; fri.type = fa->fa_type; fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); @@ -2625,7 +2625,7 @@ static void fib_table_print(struct seq_file *seq, struct fib_table *tb) static int fib_triestat_seq_show(struct seq_file *seq, void *v) { - struct net *net = (struct net *)seq->private; + struct net *net = seq->private; unsigned int h; seq_printf(seq, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 0d085cc8d96c..025a33c1b04d 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -16,7 +16,6 @@ #include <net/protocol.h> #include <net/udp.h> #include <net/udp_tunnel.h> -#include <net/xfrm.h> #include <uapi/linux/fou.h> #include <uapi/linux/genetlink.h> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72a375c7f417..efea0e796f06 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -186,7 +186,7 @@ EXPORT_SYMBOL(icmp_err_convert); */ struct icmp_control { - bool (*handler)(struct sk_buff *skb); + enum skb_drop_reason (*handler)(struct sk_buff *skb); short error; /* This ICMP is classed as an error message */ }; @@ -342,7 +342,7 @@ void icmp_out_count(struct net *net, unsigned char type) static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { - struct icmp_bxm *icmp_param = (struct icmp_bxm *)from; + struct icmp_bxm *icmp_param = from; __wsum csum; csum = skb_copy_and_csum_bits(icmp_param->skb, @@ -839,8 +839,9 @@ static bool icmp_tag_validation(int proto) * ICMP_PARAMETERPROB. */ -static bool icmp_unreach(struct sk_buff *skb) +static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; const struct iphdr *iph; struct icmphdr *icmph; struct net *net; @@ -860,8 +861,10 @@ static bool icmp_unreach(struct sk_buff *skb) icmph = icmp_hdr(skb); iph = (const struct iphdr *)skb->data; - if (iph->ihl < 5) /* Mangled header, drop. */ + if (iph->ihl < 5) { /* Mangled header, drop. */ + reason = SKB_DROP_REASON_IP_INHDR; goto out_err; + } switch (icmph->type) { case ICMP_DEST_UNREACH: @@ -941,10 +944,10 @@ static bool icmp_unreach(struct sk_buff *skb) icmp_socket_deliver(skb, info); out: - return true; + return reason; out_err: __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return false; + return reason ?: SKB_DROP_REASON_NOT_SPECIFIED; } @@ -952,20 +955,20 @@ out_err: * Handle ICMP_REDIRECT. */ -static bool icmp_redirect(struct sk_buff *skb) +static enum skb_drop_reason icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); - return false; + return SKB_DROP_REASON_PKT_TOO_SMALL; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) { /* there aught to be a stat */ - return false; + return SKB_DROP_REASON_NOMEM; } icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway)); - return true; + return SKB_NOT_DROPPED_YET; } /* @@ -982,7 +985,7 @@ static bool icmp_redirect(struct sk_buff *skb) * See also WRT handling of options once they are done and working. */ -static bool icmp_echo(struct sk_buff *skb) +static enum skb_drop_reason icmp_echo(struct sk_buff *skb) { struct icmp_bxm icmp_param; struct net *net; @@ -990,7 +993,7 @@ static bool icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ if (net->ipv4.sysctl_icmp_echo_ignore_all) - return true; + return SKB_NOT_DROPPED_YET; icmp_param.data.icmph = *icmp_hdr(skb); icmp_param.skb = skb; @@ -1001,10 +1004,10 @@ static bool icmp_echo(struct sk_buff *skb) if (icmp_param.data.icmph.type == ICMP_ECHO) icmp_param.data.icmph.type = ICMP_ECHOREPLY; else if (!icmp_build_probe(skb, &icmp_param.data.icmph)) - return true; + return SKB_NOT_DROPPED_YET; icmp_reply(&icmp_param, skb); - return true; + return SKB_NOT_DROPPED_YET; } /* Helper for icmp_echo and icmpv6_echo_reply. @@ -1122,7 +1125,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe); * MUST be accurate to a few minutes. * MUST be updated at least at 15Hz. */ -static bool icmp_timestamp(struct sk_buff *skb) +static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) { struct icmp_bxm icmp_param; /* @@ -1147,17 +1150,17 @@ static bool icmp_timestamp(struct sk_buff *skb) icmp_param.data_len = 0; icmp_param.head_len = sizeof(struct icmphdr) + 12; icmp_reply(&icmp_param, skb); - return true; + return SKB_NOT_DROPPED_YET; out_err: __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); - return false; + return SKB_DROP_REASON_PKT_TOO_SMALL; } -static bool icmp_discard(struct sk_buff *skb) +static enum skb_drop_reason icmp_discard(struct sk_buff *skb) { /* pretend it was a success */ - return true; + return SKB_NOT_DROPPED_YET; } /* @@ -1165,18 +1168,20 @@ static bool icmp_discard(struct sk_buff *skb) */ int icmp_rcv(struct sk_buff *skb) { - struct icmphdr *icmph; + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); - bool success; + struct icmphdr *icmph; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & - XFRM_STATE_ICMP)) + XFRM_STATE_ICMP)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr))) goto drop; @@ -1184,8 +1189,11 @@ int icmp_rcv(struct sk_buff *skb) nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*icmph)); - if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, + skb)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } skb_set_network_header(skb, nh); } @@ -1207,13 +1215,13 @@ int icmp_rcv(struct sk_buff *skb) /* We can't use icmp_pointers[].handler() because it is an array of * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42. */ - success = icmp_echo(skb); - goto success_check; + reason = icmp_echo(skb); + goto reason_check; } if (icmph->type == ICMP_EXT_ECHOREPLY) { - success = ping_rcv(skb); - goto success_check; + reason = ping_rcv(skb); + goto reason_check; } /* @@ -1222,8 +1230,10 @@ int icmp_rcv(struct sk_buff *skb) * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently * discarded. */ - if (icmph->type > NR_ICMP_TYPES) + if (icmph->type > NR_ICMP_TYPES) { + reason = SKB_DROP_REASON_UNHANDLED_PROTO; goto error; + } /* * Parse the ICMP message @@ -1239,27 +1249,30 @@ int icmp_rcv(struct sk_buff *skb) if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } if (icmph->type != ICMP_ECHO && icmph->type != ICMP_TIMESTAMP && icmph->type != ICMP_ADDRESS && icmph->type != ICMP_ADDRESSREPLY) { + reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } } - success = icmp_pointers[icmph->type].handler(skb); -success_check: - if (success) { + reason = icmp_pointers[icmph->type].handler(skb); +reason_check: + if (!reason) { consume_skb(skb); return NET_RX_SUCCESS; } drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NET_RX_DROP; csum_error: + reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); error: __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 1d9e6d5e9a76..b65d074d9620 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2839,7 +2839,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n"); else { - struct ip_mc_list *im = (struct ip_mc_list *)v; + struct ip_mc_list *im = v; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; long delta; @@ -2983,7 +2983,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) static int igmp_mcf_seq_show(struct seq_file *seq, void *v) { - struct ip_sf_list *psf = (struct ip_sf_list *)v; + struct ip_sf_list *psf = v; struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); if (v == SEQ_START_TOKEN) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 63948f6aeca0..c9f9ac5013a7 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -510,7 +510,7 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { - struct sk_buff **nextp = (struct sk_buff **)reasm_data; + struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; int sum_truesize; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 92ba3350274b..e3aa436a1bdf 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -90,6 +90,7 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); struct net *net; + SKB_DR(reason); /* that should never happen */ if (skb->pkt_type != PACKET_HOST) @@ -101,8 +102,10 @@ int ip_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) + if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto drop; + } if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; @@ -118,8 +121,10 @@ int ip_forward(struct sk_buff *skb) if (ip_hdr(skb)->ttl <= 1) goto too_many_hops; - if (!xfrm4_route_forward(skb)) + if (!xfrm4_route_forward(skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto drop; + } rt = skb_rtable(skb); @@ -132,6 +137,7 @@ int ip_forward(struct sk_buff *skb) IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + SKB_DR_SET(reason, PKT_TOO_BIG); goto drop; } @@ -169,7 +175,8 @@ too_many_hops: /* Tell the sender its packet died... */ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); + SKB_DR_SET(reason, IP_INHDR); drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NET_RX_DROP; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index aacee9dd771b..7e474a85deaf 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -748,6 +748,7 @@ free_skb: static void ipgre_link_update(struct net_device *dev, bool set_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); + __be16 flags; int len; len = tunnel->tun_hlen; @@ -763,19 +764,15 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) if (set_mtu) dev->mtu = max_t(int, dev->mtu - len, 68); - if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { - if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || - tunnel->encap.type == TUNNEL_ENCAP_NONE) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } else { - dev->features &= ~NETIF_F_GSO_SOFTWARE; - dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; - } - dev->features |= NETIF_F_LLTX; - } else { + flags = tunnel->parms.o_flags; + + if (flags & TUNNEL_SEQ || + (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) { + dev->features &= ~NETIF_F_GSO_SOFTWARE; dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; - dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE); + } else { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; } } @@ -949,6 +946,7 @@ static void ipgre_tunnel_setup(struct net_device *dev) static void __gre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel; + __be16 flags; tunnel = netdev_priv(dev); tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); @@ -957,25 +955,21 @@ static void __gre_tunnel_init(struct net_device *dev) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph); - dev->features |= GRE_FEATURES; + dev->features |= GRE_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE_FEATURES; - if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported, nor - * can we support 2 levels of outer headers requiring - * an update. - */ - if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || - (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + flags = tunnel->parms.o_flags; - /* Can use a lockless transmit, unless we generate - * output sequences - */ - dev->features |= NETIF_F_LLTX; - } + /* TCP offload with GRE SEQ is not supported, nor can we support 2 + * levels of outer headers requiring an update. + */ + if (flags & TUNNEL_SEQ) + return; + if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE) + return; + + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; } static int ipgre_tunnel_init(struct net_device *dev) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 95f7bb052784..b1165f717cd1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -451,6 +451,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) * that it receives, do not try to analyse it. */ if (skb->pkt_type == PACKET_OTHERHOST) { + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); drop_reason = SKB_DROP_REASON_OTHERHOST; goto drop; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c860519d57ee..13e6329784fb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -356,7 +356,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct mfc_cache_cmp_arg *cmparg = arg->key; - struct mfc_cache *c = (struct mfc_cache *)ptr; + const struct mfc_cache *c = ptr; return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || cmparg->mfc_origin != c->mfc_origin; diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 4151eb1262dd..b75cac69bd7e 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, fl4.daddr = iph->daddr; fl4.saddr = get_saddr(iph->saddr); } else { + if (nft_hook(pkt) == NF_INET_FORWARD && + priv->flags & NFTA_FIB_F_IIF) + fl4.flowi4_iif = nft_out(pkt)->ifindex; + fl4.daddr = iph->saddr; fl4.saddr = get_saddr(iph->daddr); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3ee947557b88..5f8cad2978b3 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -590,7 +590,7 @@ EXPORT_SYMBOL_GPL(ping_err); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *skb) { - struct pingfakehdr *pfh = (struct pingfakehdr *)from; + struct pingfakehdr *pfh = from; if (offset == 0) { fraglen -= sizeof(struct icmphdr); @@ -844,8 +844,8 @@ do_confirm: goto out; } -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; @@ -861,7 +861,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, if (flags & MSG_ERRQUEUE) return inet_recv_error(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -934,16 +934,24 @@ out: } EXPORT_SYMBOL_GPL(ping_recvmsg); -int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, + struct sk_buff *skb) { + enum skb_drop_reason reason; + pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + kfree_skb_reason(skb, reason); pr_debug("ping_queue_rcv_skb -> failed\n"); - return -1; + return reason; } - return 0; + return SKB_NOT_DROPPED_YET; +} + +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return __ping_queue_rcv_skb(sk, skb) ? -1 : 0; } EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); @@ -952,12 +960,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); * All we need to do is get the socket. */ -bool ping_rcv(struct sk_buff *skb) +enum skb_drop_reason ping_rcv(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET; struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); - bool rc = false; /* We assume the packet has already been checked by icmp_rcv */ @@ -972,15 +980,17 @@ bool ping_rcv(struct sk_buff *skb) struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); - if (skb2 && !ping_queue_rcv_skb(sk, skb2)) - rc = true; + if (skb2) + reason = __ping_queue_rcv_skb(sk, skb2); + else + reason = SKB_DROP_REASON_NOMEM; sock_put(sk); } - if (!rc) + if (reason) pr_debug("no socket, dropping\n"); - return rc; + return reason; } EXPORT_SYMBOL_GPL(ping_rcv); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9f97b9cbf7b3..bbd717805b10 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -753,7 +753,7 @@ out: */ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -769,7 +769,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -783,7 +783,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98c6f3429593..ffbe2e4f8c89 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -503,28 +503,29 @@ static void ip_rt_fix_tos(struct flowi4 *fl4) __u8 tos = RT_FL_TOS(fl4); fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = tos & RTO_ONLINK ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + if (tos & RTO_ONLINK) + fl4->flowi4_scope = RT_SCOPE_LINK; } static void __build_flow_key(const struct net *net, struct flowi4 *fl4, - const struct sock *sk, - const struct iphdr *iph, - int oif, u8 tos, - u8 prot, u32 mark, int flow_flags) + const struct sock *sk, const struct iphdr *iph, + int oif, __u8 tos, u8 prot, u32 mark, + int flow_flags) { + __u8 scope = RT_SCOPE_UNIVERSE; + if (sk) { const struct inet_sock *inet = inet_sk(sk); oif = sk->sk_bound_dev_if; mark = sk->sk_mark; - tos = RT_CONN_FLAGS(sk); + tos = ip_sock_rt_tos(sk); + scope = ip_sock_rt_scope(sk); prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; } - flowi4_init_output(fl4, oif, mark, tos, - RT_SCOPE_UNIVERSE, prot, - flow_flags, - iph->daddr, iph->saddr, 0, 0, + + flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope, + prot, flow_flags, iph->daddr, iph->saddr, 0, 0, sock_net_uid(net, sk)); } @@ -534,9 +535,9 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, const struct net *net = dev_net(skb->dev); const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; - u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; + __u8 tos = iph->tos; __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); } @@ -552,7 +553,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, - RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + ip_sock_rt_tos(sk) & IPTOS_RT_MASK, + ip_sock_rt_scope(sk), inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, inet->inet_saddr, 0, 0, sk->sk_uid); @@ -825,14 +827,13 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf const struct iphdr *iph = (const struct iphdr *) skb->data; struct net *net = dev_net(skb->dev); int oif = skb->dev->ifindex; - u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; + __u8 tos = iph->tos; rt = (struct rtable *) dst; __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); - ip_rt_fix_tos(&fl4); __ip_do_redirect(rt, skb, &fl4, true); } @@ -945,6 +946,7 @@ static int ip_error(struct sk_buff *skb) struct inet_peer *peer; unsigned long now; struct net *net; + SKB_DR(reason); bool send; int code; @@ -964,10 +966,12 @@ static int ip_error(struct sk_buff *skb) if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { case EHOSTUNREACH: + SKB_DR_SET(reason, IP_INADDRERRORS); __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); break; case ENETUNREACH: + SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; } @@ -983,6 +987,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; + SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; case EACCES: @@ -1009,7 +1014,7 @@ static int ip_error(struct sk_buff *skb) if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); -out: kfree_skb(skb); +out: kfree_skb_reason(skb, reason); return 0; } @@ -1057,7 +1062,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - ip_rt_fix_tos(&fl4); /* Don't make lookup fail for bridged encapsulations */ if (skb && netif_is_any_bridge_port(skb->dev)) @@ -1074,8 +1078,8 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct rtable *rt; u32 mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(net, &fl4, NULL, iph, oif, - RT_TOS(iph->tos), protocol, mark, 0); + __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark, + 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_rt_update_pmtu(rt, &fl4, mtu); @@ -1132,8 +1136,6 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; new = true; - } else { - ip_rt_fix_tos(&fl4); } __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); @@ -1165,8 +1167,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(net, &fl4, NULL, iph, oif, - RT_TOS(iph->tos), protocol, 0, 0); + __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); @@ -3394,7 +3395,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fri.tb_id = table_id; fri.dst = res.prefix; fri.dst_len = res.prefixlen; - fri.tos = fl4.flowi4_tos; + fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos); fri.type = rt->rt_type; fri.offload = 0; fri.trap = 0; @@ -3407,7 +3408,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (fa->fa_slen == slen && fa->tb_id == fri.tb_id && - fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) && + fa->fa_dscp == fri.dscp && fa->fa_info == res.fi && fa->fa_type == fri.type) { fri.offload = READ_ONCE(fa->offload); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ad80d180b60b..cd448cdd3b38 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -20,10 +20,6 @@ #include <net/protocol.h> #include <net/netevent.h> -static int two = 2; -static int three __maybe_unused = 3; -static int four = 4; -static int thousand = 1000; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -1006,7 +1002,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_max_syn_backlog", @@ -1059,7 +1055,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "fib_multipath_hash_fields", @@ -1117,7 +1113,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &four, + .extra2 = SYSCTL_FOUR, }, { .procname = "tcp_recovery", @@ -1310,7 +1306,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &thousand, + .extra2 = SYSCTL_ONE_THOUSAND, }, { .procname = "tcp_pacing_ca_ratio", @@ -1319,7 +1315,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &thousand, + .extra2 = SYSCTL_ONE_THOUSAND, }, { .procname = "tcp_wmem", @@ -1391,7 +1387,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf18fbcbf123..b44fde435bd1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -429,7 +429,7 @@ void tcp_init_sock(struct sock *sk) * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - tp->snd_cwnd = TCP_INIT_CWND; + tcp_snd_cwnd_set(tp, TCP_INIT_CWND); /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; @@ -843,7 +843,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); - sk_defer_free_flush(sk); if (spliced) return spliced; @@ -1589,20 +1588,6 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } -void __sk_defer_free_flush(struct sock *sk) -{ - struct llist_node *head; - struct sk_buff *skb, *n; - - head = llist_del_all(&sk->defer_list); - llist_for_each_entry_safe(skb, n, head, ll_node) { - prefetch(n); - skb_mark_not_on_list(skb); - __kfree_skb(skb); - } -} -EXPORT_SYMBOL(__sk_defer_free_flush); - static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); @@ -1610,11 +1595,7 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; - if (!skb_queue_empty(&sk->sk_receive_queue) || - !llist_empty(&sk->defer_list)) { - llist_add(&skb->ll_node, &sk->defer_list); - return; - } + return skb_attempt_defer_free(skb); } __kfree_skb(skb); } @@ -1877,8 +1858,7 @@ static void tcp_zerocopy_set_hint_for_skb(struct sock *sk, } static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, - struct scm_timestamping_internal *tss, + int flags, struct scm_timestamping_internal *tss, int *cmsg_flags); static int receive_fallback_to_copy(struct sock *sk, struct tcp_zerocopy_receive *zc, int inq, @@ -1900,7 +1880,7 @@ static int receive_fallback_to_copy(struct sock *sk, if (err) return err; - err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0, + err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT, tss, &zc->msg_flags); if (err < 0) return err; @@ -2316,8 +2296,7 @@ static int tcp_inq_hint(struct sock *sk) */ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, - struct scm_timestamping_internal *tss, + int flags, struct scm_timestamping_internal *tss, int *cmsg_flags) { struct tcp_sock *tp = tcp_sk(sk); @@ -2335,9 +2314,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (sk->sk_state == TCP_LISTEN) goto out; - if (tp->recvmsg_inq) + if (tp->recvmsg_inq) { *cmsg_flags = TCP_CMSG_INQ; - timeo = sock_rcvtimeo(sk, nonblock); + msg->msg_get_inq = 1; + } + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); /* Urgent data needs to be handled specially. */ if (flags & MSG_OOB) @@ -2455,7 +2436,6 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, __sk_flush_backlog(sk); } else { tcp_cleanup_rbuf(sk, copied); - sk_defer_free_flush(sk); sk_wait_data(sk, &timeo, last); } @@ -2556,10 +2536,10 @@ recv_sndq: goto out; } -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { - int cmsg_flags = 0, ret, inq; + int cmsg_flags = 0, ret; struct scm_timestamping_internal tss; if (unlikely(flags & MSG_ERRQUEUE)) @@ -2568,20 +2548,20 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && sk->sk_state == TCP_ESTABLISHED) - sk_busy_loop(sk, nonblock); + sk_busy_loop(sk, flags & MSG_DONTWAIT); lock_sock(sk); - ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss, - &cmsg_flags); + ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags); release_sock(sk); - sk_defer_free_flush(sk); - if (cmsg_flags && ret >= 0) { + if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); - if (cmsg_flags & TCP_CMSG_INQ) { - inq = tcp_inq_hint(sk); - put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); + if (msg->msg_get_inq) { + msg->msg_inq = tcp_inq_hint(sk); + if (cmsg_flags & TCP_CMSG_INQ) + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, + sizeof(msg->msg_inq), &msg->msg_inq); } } return ret; @@ -3033,7 +3013,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - tp->snd_cwnd = TCP_INIT_CWND; + tcp_snd_cwnd_set(tp, TCP_INIT_CWND); tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tp->delivered = 0; @@ -3099,7 +3079,6 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; } - sk_defer_free_flush(sk); sk_error_report(sk); return 0; } @@ -3744,7 +3723,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_max_pacing_rate = rate64; info->tcpi_reordering = tp->reordering; - info->tcpi_snd_cwnd = tp->snd_cwnd; + info->tcpi_snd_cwnd = tcp_snd_cwnd(tp); if (info->tcpi_state == TCP_LISTEN) { /* listeners aliased fields : @@ -3915,7 +3894,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, rate64 = tcp_compute_delivery_rate(tp); nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); - nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd); + nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp)); nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); @@ -4228,7 +4207,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); release_sock(sk); - sk_defer_free_flush(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) goto zerocopy_rcv_cmsg; switch (len) { diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 02e8626ccb27..c7d30a3bbd81 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -276,7 +276,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) } else { /* no RTT sample yet */ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ } - bw = (u64)tp->snd_cwnd * BW_UNIT; + bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); } @@ -323,9 +323,9 @@ static void bbr_save_cwnd(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) - bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */ else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ - bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); + bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp)); } static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) @@ -482,7 +482,7 @@ static bool bbr_set_cwnd_to_recover_or_restore( struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; - u32 cwnd = tp->snd_cwnd; + u32 cwnd = tcp_snd_cwnd(tp); /* An ACK for P pkts should release at most 2*P packets. We do this * in two steps. First, here we deduct the number of lost packets. @@ -520,7 +520,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = tp->snd_cwnd, target_cwnd = 0; + u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; if (!acked) goto done; /* no packet fully ACKed; just apply caps */ @@ -544,9 +544,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, cwnd = max(cwnd, bbr_cwnd_min_target); done: - tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ - tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); } /* End cycle phase if it's time and/or we hit the phase's in-flight target. */ @@ -856,7 +856,7 @@ static void bbr_update_ack_aggregation(struct sock *sk, bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, bbr->ack_epoch_acked + rs->acked_sacked); extra_acked = bbr->ack_epoch_acked - expected_acked; - extra_acked = min(extra_acked, tp->snd_cwnd); + extra_acked = min(extra_acked, tcp_snd_cwnd(tp)); if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; } @@ -914,7 +914,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk) return; bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ - tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); bbr_reset_mode(sk); } @@ -1093,7 +1093,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ bbr->full_bw_cnt = 0; bbr_reset_lt_bw_sampling(sk); - return tcp_sk(sk)->snd_cwnd; + return tcp_snd_cwnd(tcp_sk(sk)); } /* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index f5f588b1f6e9..58358bf92e1b 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!acked) return; } - bictcp_update(ca, tp->snd_cwnd); + bictcp_update(ca, tcp_snd_cwnd(tp)); tcp_cong_avoid_ai(tp, ca->cnt, acked); } @@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ - if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) - ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) + if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) + ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else - ca->last_max_cwnd = tp->snd_cwnd; + ca->last_max_cwnd = tcp_snd_cwnd(tp); - if (tp->snd_cwnd <= low_window) - return max(tp->snd_cwnd >> 1U, 2U); + if (tcp_snd_cwnd(tp) <= low_window) + return max(tcp_snd_cwnd(tp) >> 1U, 2U); else - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); + return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } static void bictcp_state(struct sock *sk, u8 new_state) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 1cdcb4df0eb7..be3947e70fec 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -174,7 +174,6 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, static int tcp_bpf_recvmsg_parser(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) { @@ -186,7 +185,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); lock_sock(sk); msg_bytes_ready: @@ -211,7 +210,7 @@ msg_bytes_ready: goto out; } - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); if (!timeo) { copied = -EAGAIN; goto out; @@ -234,7 +233,7 @@ out: } static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_psock *psock; int copied, ret; @@ -244,11 +243,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); if (!skb_queue_empty(&sk->sk_receive_queue) && sk_psock_queue_empty(psock)) { sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); } lock_sock(sk); msg_bytes_ready: @@ -257,14 +256,14 @@ msg_bytes_ready: long timeo; int data; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = tcp_msg_wait_data(sk, psock, timeo); if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; release_sock(sk); sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); } copied = -EAGAIN; } diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 709d23801823..ddc7ba0554bd 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk) LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); return; } } @@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk) LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } @@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad) return false; } - ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd); + ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp)); ca->state = CDG_BACKOFF; tcp_enter_cwr(sk); return true; @@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) } if (!tcp_is_cwnd_limited(sk)) { - ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd); + ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp)); return; } - prior_snd_cwnd = tp->snd_cwnd; + prior_snd_cwnd = tcp_snd_cwnd(tp); tcp_reno_cong_avoid(sk, ack, acked); - incr = tp->snd_cwnd - prior_snd_cwnd; + incr = tcp_snd_cwnd(tp) - prior_snd_cwnd; ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); } @@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (ca->state == CDG_BACKOFF) - return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10); + return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10); if (ca->state == CDG_NONFULL && use_tolerance) - return tp->snd_cwnd; + return tcp_snd_cwnd(tp); - ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd); + ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp)); if (use_shadow) - return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1); - return max(2U, tp->snd_cwnd >> 1); + return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1); + return max(2U, tcp_snd_cwnd(tp) >> 1); } static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) @@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) ca->gradients = gradients; ca->rtt_seq = tp->snd_nxt; - ca->shadow_wnd = tp->snd_cwnd; + ca->shadow_wnd = tcp_snd_cwnd(tp); break; case CA_EVENT_COMPLETE_CWR: ca->state = CDG_UNKNOWN; @@ -380,7 +380,7 @@ static void tcp_cdg_init(struct sock *sk) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), GFP_NOWAIT | __GFP_NOWARN); ca->rtt_seq = tp->snd_nxt; - ca->shadow_wnd = tp->snd_cwnd; + ca->shadow_wnd = tcp_snd_cwnd(tp); } static void tcp_cdg_release(struct sock *sk) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index dc95572163df..d3cae40749e8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/jhash.h> #include <net/tcp.h> +#include <trace/events/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); @@ -33,6 +34,17 @@ struct tcp_congestion_ops *tcp_ca_find(const char *name) return NULL; } +void tcp_set_ca_state(struct sock *sk, const u8 ca_state) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + trace_tcp_cong_state_set(sk, ca_state); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; +} + /* Must be called with rcu lock held */ static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, const char *name) @@ -393,10 +405,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, */ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { - u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); + u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh); - acked -= cwnd - tp->snd_cwnd; - tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); + acked -= cwnd - tcp_snd_cwnd(tp); + tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); return acked; } @@ -410,7 +422,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) /* If credits accumulated at a higher w, apply them gently now. */ if (tp->snd_cwnd_cnt >= w) { tp->snd_cwnd_cnt = 0; - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } tp->snd_cwnd_cnt += acked; @@ -418,9 +430,9 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) u32 delta = tp->snd_cwnd_cnt / w; tp->snd_cwnd_cnt -= delta * w; - tp->snd_cwnd += delta; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta); } - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp)); } EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); @@ -445,7 +457,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; } /* In dangerous area, increase slowly. */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); @@ -454,7 +466,7 @@ u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd >> 1U, 2U); + return max(tcp_snd_cwnd(tp) >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); @@ -462,7 +474,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd, tp->prior_cwnd); + return max(tcp_snd_cwnd(tp), tp->prior_cwnd); } EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 24d562dd6225..b0918839bee7 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -334,7 +334,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!acked) return; } - bictcp_update(ca, tp->snd_cwnd, acked); + bictcp_update(ca, tcp_snd_cwnd(tp), acked); tcp_cong_avoid_ai(tp, ca->cnt, acked); } @@ -346,13 +346,13 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk) ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ - if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) - ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) + if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) + ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else - ca->last_max_cwnd = tp->snd_cwnd; + ca->last_max_cwnd = tcp_snd_cwnd(tp); - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); + return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } static void cubictcp_state(struct sock *sk, u8 new_state) @@ -413,13 +413,13 @@ static void hystart_update(struct sock *sk, u32 delay) ca->found = 1; pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n", now - ca->round_start, threshold, - ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd); + ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp)); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } @@ -438,8 +438,8 @@ static void hystart_update(struct sock *sk, u32 delay) LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } @@ -469,7 +469,7 @@ static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) /* hystart triggers when cwnd is larger than some threshold */ if (!ca->found && tcp_in_slow_start(tp) && hystart && - tp->snd_cwnd >= hystart_low_window) + tcp_snd_cwnd(tp) >= hystart_low_window) hystart_update(sk, delay); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 1943a6630341..ab034a4e9324 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -106,8 +106,8 @@ static u32 dctcp_ssthresh(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - ca->loss_cwnd = tp->snd_cwnd; - return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); + ca->loss_cwnd = tcp_snd_cwnd(tp); + return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U); } static void dctcp_update_alpha(struct sock *sk, u32 flags) @@ -148,8 +148,8 @@ static void dctcp_react_to_loss(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - ca->loss_cwnd = tp->snd_cwnd; - tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); + ca->loss_cwnd = tcp_snd_cwnd(tp); + tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U); } static void dctcp_state(struct sock *sk, u8 new_state) @@ -211,8 +211,9 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, static u32 dctcp_cwnd_undo(struct sock *sk) { const struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); + return max(tcp_snd_cwnd(tp), ca->loss_cwnd); } static struct tcp_congestion_ops dctcp __read_mostly = { diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 349069d6cd0a..c6de5ce79ad3 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -127,22 +127,22 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) * snd_cwnd <= * hstcp_aimd_vals[ca->ai].cwnd */ - if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { - while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && + if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) { + while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd && ca->ai < HSTCP_AIMD_MAX - 1) ca->ai++; - } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) { - while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) + } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) { + while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) ca->ai--; } /* Do additive increase */ - if (tp->snd_cwnd < tp->snd_cwnd_clamp) { + if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) { /* cwnd = cwnd + a(w) / cwnd */ tp->snd_cwnd_cnt += ca->ai + 1; - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd_cnt -= tp->snd_cwnd; - tp->snd_cwnd++; + if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { + tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp); + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } } } @@ -154,7 +154,7 @@ static u32 hstcp_ssthresh(struct sock *sk) struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ - return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); + return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); } static struct tcp_congestion_ops tcp_highspeed __read_mostly = { diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 55adcfcf96fe..52b1f2665dfa 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -124,7 +124,7 @@ static void measure_achieved_throughput(struct sock *sk, ca->packetcount += sample->pkts_acked; - if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) && + if (ca->packetcount >= tcp_snd_cwnd(tp) - (ca->alpha >> 7 ? : 1) && now - ca->lasttime >= ca->minRTT && ca->minRTT > 0) { __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime); @@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) const struct htcp *ca = inet_csk_ca(sk); htcp_param_update(sk); - return max((tp->snd_cwnd * ca->beta) >> 7, 2U); + return max((tcp_snd_cwnd(tp) * ca->beta) >> 7, 2U); } static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -242,9 +242,9 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* In dangerous area, increase slowly. * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd */ - if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; + if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tcp_snd_cwnd(tp)) { + if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); tp->snd_cwnd_cnt = 0; htcp_alpha_update(ca); } else diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index be39327e04e6..abd7d91807e5 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -54,7 +54,7 @@ static void hybla_init(struct sock *sk) ca->rho2_7ls = 0; ca->snd_cwnd_cents = 0; ca->hybla_en = true; - tp->snd_cwnd = 2; + tcp_snd_cwnd_set(tp, 2); tp->snd_cwnd_clamp = 65535; /* 1st Rho measurement based on initial srtt */ @@ -62,7 +62,7 @@ static void hybla_init(struct sock *sk) /* set minimum rtt as this is the 1st ever seen */ ca->minrtt_us = tp->srtt_us; - tp->snd_cwnd = ca->rho; + tcp_snd_cwnd_set(tp, ca->rho); } static void hybla_state(struct sock *sk, u8 ca_state) @@ -137,31 +137,31 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) * as long as increment is estimated as (rho<<7)/window * it already is <<7 and we can easily count its fractions. */ - increment = ca->rho2_7ls / tp->snd_cwnd; + increment = ca->rho2_7ls / tcp_snd_cwnd(tp); if (increment < 128) tp->snd_cwnd_cnt++; } odd = increment % 128; - tp->snd_cwnd += increment >> 7; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + (increment >> 7)); ca->snd_cwnd_cents += odd; /* check when fractions goes >=128 and increase cwnd by 1. */ while (ca->snd_cwnd_cents >= 128) { - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); ca->snd_cwnd_cents -= 128; tp->snd_cwnd_cnt = 0; } /* check when cwnd has not been incremented for a while */ - if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd++; + if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); tp->snd_cwnd_cnt = 0; } /* clamp down slowstart cwnd to ssthresh value. */ if (is_slowstart) - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_ssthresh)); - tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp)); } static struct tcp_congestion_ops tcp_hybla __read_mostly = { diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 00e54873213e..c0c81a2c77fa 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -224,7 +224,7 @@ static void update_params(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); - if (tp->snd_cwnd < win_thresh) { + if (tcp_snd_cwnd(tp) < win_thresh) { ca->alpha = ALPHA_BASE; ca->beta = BETA_BASE; } else if (ca->cnt_rtt > 0) { @@ -284,9 +284,9 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) * tp->snd_cwnd += alpha/tp->snd_cwnd */ delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; - if (delta >= tp->snd_cwnd) { - tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, - (u32)tp->snd_cwnd_clamp); + if (delta >= tcp_snd_cwnd(tp)) { + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp), + (u32)tp->snd_cwnd_clamp)); tp->snd_cwnd_cnt = 0; } } @@ -296,9 +296,11 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); + u32 decr; /* Multiplicative decrease */ - return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); + decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT; + return max(tcp_snd_cwnd(tp) - decr, 2U); } /* Extract info for Tcp socket info provided via netlink. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 60f99e9fb6d1..97cfcd85f84e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -414,7 +414,7 @@ static void tcp_sndbuf_expand(struct sock *sk) per_mss = roundup_pow_of_two(per_mss) + SKB_DATA_ALIGN(sizeof(struct sk_buff)); - nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); + nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp)); nr_segs = max_t(u32, nr_segs, tp->reordering + 1); /* Fast Recovery (RFC 5681 3.2) : @@ -909,12 +909,12 @@ static void tcp_update_pacing_rate(struct sock *sk) * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching * end of slow start and should slow down. */ - if (tp->snd_cwnd < tp->snd_ssthresh / 2) + if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; else rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; - rate *= max(tp->snd_cwnd, tp->packets_out); + rate *= max(tcp_snd_cwnd(tp), tp->packets_out); if (likely(tp->srtt_us)) do_div(rate, tp->srtt_us); @@ -2147,12 +2147,12 @@ void tcp_enter_loss(struct sock *sk) !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->prior_cwnd = tp->snd_cwnd; + tp->prior_cwnd = tcp_snd_cwnd(tp); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); } - tp->snd_cwnd = tcp_packets_in_flight(tp) + 1; + tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1); tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; @@ -2458,7 +2458,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, &inet->inet_daddr, ntohs(inet->inet_dport), - tp->snd_cwnd, tcp_left_out(tp), + tcp_snd_cwnd(tp), tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -2467,7 +2467,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, &sk->sk_v6_daddr, ntohs(inet->inet_dport), - tp->snd_cwnd, tcp_left_out(tp), + tcp_snd_cwnd(tp), tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -2492,7 +2492,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); - tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); + tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk)); if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; @@ -2599,7 +2599,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tp->high_seq = tp->snd_nxt; tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; - tp->prior_cwnd = tp->snd_cwnd; + tp->prior_cwnd = tcp_snd_cwnd(tp); tp->prr_delivered = 0; tp->prr_out = 0; tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); @@ -2629,7 +2629,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, } /* Force a fast retransmit upon entering fast recovery */ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); - tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; + tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt); } static inline void tcp_end_cwnd_reduction(struct sock *sk) @@ -2642,7 +2642,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { - tp->snd_cwnd = tp->snd_ssthresh; + tcp_snd_cwnd_set(tp, tp->snd_ssthresh); tp->snd_cwnd_stamp = tcp_jiffies32; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2709,9 +2709,9 @@ static void tcp_mtup_probe_success(struct sock *sk) /* FIXME: breaks with very large cwnd */ tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = tp->snd_cwnd * - tcp_mss_to_mtu(sk, tp->mss_cache) / - icsk->icsk_mtup.probe_size; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) * + tcp_mss_to_mtu(sk, tp->mss_cache) / + icsk->icsk_mtup.probe_size); tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); @@ -3034,7 +3034,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tp->snd_una == tp->mtu_probe.probe_seq_start) { tcp_mtup_probe_failed(sk); /* Restores the reduction we did in tcp_mtup_probe() */ - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); tcp_simple_retransmit(sk); return; } @@ -3766,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una - tp->max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk); - return -1; + return -SKB_DROP_REASON_TCP_TOO_OLD_ACK; } goto old_ack; } @@ -3775,7 +3775,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * this segment (RFC793 Section 3.9). */ if (after(ack, tp->snd_nxt)) - return -1; + return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA; if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; @@ -4675,7 +4675,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk, { bool res = tcp_try_coalesce(sk, to, from, fragstolen); - /* In case tcp_drop() is called later, update to->gso_segs */ + /* In case tcp_drop_reason() is called later, update to->gso_segs */ if (res) { u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + max_t(u16, 1, skb_shinfo(from)->gso_segs); @@ -4692,11 +4692,6 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb, kfree_skb_reason(skb, reason); } -static void tcp_drop(struct sock *sk, struct sk_buff *skb) -{ - tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED); -} - /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ @@ -4724,7 +4719,7 @@ static void tcp_ofo_queue(struct sock *sk) rb_erase(&skb->rbnode, &tp->out_of_order_queue); if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP); continue; } @@ -5335,7 +5330,8 @@ static bool tcp_prune_ofo_queue(struct sock *sk) prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); goal -= rb_to_skb(node)->truesize; - tcp_drop(sk, rb_to_skb(node)); + tcp_drop_reason(sk, rb_to_skb(node), + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); if (!prev || goal <= 0) { sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && @@ -5437,7 +5433,7 @@ static bool tcp_should_expand_sndbuf(struct sock *sk) return false; /* If we filled the congestion window, do not expand. */ - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp)) return false; return true; @@ -5678,7 +5674,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, int syn_inerr) { struct tcp_sock *tp = tcp_sk(sk); - bool rst_seq_match = false; + SKB_DR(reason); /* RFC1323: H1. Apply PAWS check first. */ if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) && @@ -5690,6 +5686,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, LINUX_MIB_TCPACKSKIPPEDPAWS, &tp->last_oow_ack_time)) tcp_send_dupack(sk, skb); + SKB_DR_SET(reason, TCP_RFC7323_PAWS); goto discard; } /* Reset is accepted even if it did not pass PAWS. */ @@ -5711,8 +5708,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, &tp->last_oow_ack_time)) tcp_send_dupack(sk, skb); } else if (tcp_reset_check(sk, skb)) { - tcp_reset(sk, skb); + goto reset; } + SKB_DR_SET(reason, TCP_INVALID_SEQUENCE); goto discard; } @@ -5728,9 +5726,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, * Send a challenge ACK */ if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt || - tcp_reset_check(sk, skb)) { - rst_seq_match = true; - } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) { + tcp_reset_check(sk, skb)) + goto reset; + + if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) { struct tcp_sack_block *sp = &tp->selective_acks[0]; int max_sack = sp[0].end_seq; int this_sack; @@ -5743,21 +5742,18 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, } if (TCP_SKB_CB(skb)->seq == max_sack) - rst_seq_match = true; + goto reset; } - if (rst_seq_match) - tcp_reset(sk, skb); - else { - /* Disable TFO if RST is out-of-order - * and no data has been received - * for current active TFO socket - */ - if (tp->syn_fastopen && !tp->data_segs_in && - sk->sk_state == TCP_ESTABLISHED) - tcp_fastopen_active_disable(sk); - tcp_send_challenge_ack(sk); - } + /* Disable TFO if RST is out-of-order + * and no data has been received + * for current active TFO socket + */ + if (tp->syn_fastopen && !tp->data_segs_in && + sk->sk_state == TCP_ESTABLISHED) + tcp_fastopen_active_disable(sk); + tcp_send_challenge_ack(sk); + SKB_DR_SET(reason, TCP_RESET); goto discard; } @@ -5772,6 +5768,7 @@ syn_challenge: TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); tcp_send_challenge_ack(sk); + SKB_DR_SET(reason, TCP_INVALID_SYN); goto discard; } @@ -5780,7 +5777,12 @@ syn_challenge: return true; discard: - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, reason); + return false; + +reset: + tcp_reset(sk, skb); + __kfree_skb(skb); return false; } @@ -5926,6 +5928,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ + skb_dst_drop(skb); __skb_pull(skb, tcp_header_len); eaten = tcp_queue_rcv(sk, skb, &fragstolen); @@ -5967,9 +5970,11 @@ slow_path: return; step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) + reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT); + if ((int)reason < 0) { + reason = -reason; goto discard; - + } tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -6009,9 +6014,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) * retransmission has occurred. */ if (tp->total_retrans > 1 && tp->undo_marker) - tp->snd_cwnd = 1; + tcp_snd_cwnd_set(tp, 1); else - tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); + tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk))); tp->snd_cwnd_stamp = tcp_jiffies32; bpf_skops_established(sk, bpf_op, skb); @@ -6147,6 +6152,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; bool fastopen_fail; + SKB_DR(reason); tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) @@ -6189,7 +6195,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (th->rst) { tcp_reset(sk, skb); - goto discard; +consume: + __kfree_skb(skb); + return 0; } /* rfc793: @@ -6199,9 +6207,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * See note below! * --ANK(990513) */ - if (!th->syn) + if (!th->syn) { + SKB_DR_SET(reason, TCP_FLAGS); goto discard_and_undo; - + } /* rfc793: * "If the SYN bit is on ... * are acceptable then ... @@ -6278,13 +6287,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); - -discard: - tcp_drop(sk, skb); - return 0; - } else { - tcp_send_ack(sk); + goto consume; } + tcp_send_ack(sk); return -1; } @@ -6296,15 +6301,16 @@ discard: * * Otherwise (no ACK) drop the segment and return." */ - + SKB_DR_SET(reason, TCP_RESET); goto discard_and_undo; } /* PAWS check. */ if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && - tcp_paws_reject(&tp->rx_opt, 0)) + tcp_paws_reject(&tp->rx_opt, 0)) { + SKB_DR_SET(reason, TCP_RFC7323_PAWS); goto discard_and_undo; - + } if (th->syn) { /* We see SYN without ACK. It is attempt of * simultaneous connect with crossed SYNs. @@ -6353,7 +6359,7 @@ discard: */ return -1; #else - goto discard; + goto consume; #endif } /* "fifth, if neither of the SYN or RST bits is set then @@ -6363,7 +6369,8 @@ discard: discard_and_undo: tcp_clear_options(&tp->rx_opt); tp->rx_opt.mss_clamp = saved_clamp; - goto discard; + tcp_drop_reason(sk, skb, reason); + return 0; reset_and_undo: tcp_clear_options(&tp->rx_opt); @@ -6418,21 +6425,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) struct request_sock *req; int queued = 0; bool acceptable; + SKB_DR(reason); switch (sk->sk_state) { case TCP_CLOSE: + SKB_DR_SET(reason, TCP_CLOSE); goto discard; case TCP_LISTEN: if (th->ack) return 1; - if (th->rst) + if (th->rst) { + SKB_DR_SET(reason, TCP_RESET); goto discard; - + } if (th->syn) { - if (th->fin) + if (th->fin) { + SKB_DR_SET(reason, TCP_FLAGS); goto discard; + } /* It is possible that we process SYN packets from backlog, * so we need to make sure to disable BH and RCU right there. */ @@ -6447,6 +6459,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) consume_skb(skb); return 0; } + SKB_DR_SET(reason, TCP_FLAGS); goto discard; case TCP_SYN_SENT: @@ -6473,13 +6486,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (!tcp_check_req(sk, skb, req, true, &req_stolen)) + if (!tcp_check_req(sk, skb, req, true, &req_stolen)) { + SKB_DR_SET(reason, TCP_FASTOPEN); goto discard; + } } - if (!th->ack && !th->rst && !th->syn) + if (!th->ack && !th->rst && !th->syn) { + SKB_DR_SET(reason, TCP_FLAGS); goto discard; - + } if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; @@ -6492,6 +6508,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_SYN_RECV) return 1; /* send one RST */ tcp_send_challenge_ack(sk); + SKB_DR_SET(reason, TCP_OLD_ACK); goto discard; } switch (sk->sk_state) { @@ -6585,7 +6602,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) inet_csk_reset_keepalive_timer(sk, tmo); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; + goto consume; } break; } @@ -6593,7 +6610,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_CLOSING: if (tp->snd_una == tp->write_seq) { tcp_time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; + goto consume; } break; @@ -6601,7 +6618,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (tp->snd_una == tp->write_seq) { tcp_update_metrics(sk); tcp_done(sk); - goto discard; + goto consume; } break; } @@ -6652,9 +6669,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!queued) { discard: - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, reason); } return 0; + +consume: + __kfree_skb(skb); + return 0; } EXPORT_SYMBOL(tcp_rcv_state_process); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f9cec624068d..918816ec5dd4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -229,9 +229,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_dport = usin->sin_port; fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_TCP, - orig_sport, orig_dport, sk); + sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport, + orig_dport, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) @@ -2066,7 +2065,6 @@ process: sk_incoming_cpu_update(sk); - sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; @@ -2621,7 +2619,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk), - tp->snd_cwnd, + tcp_snd_cwnd(tp), state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 82b36ec3f2f8..ae36780977d2 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -297,7 +297,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) lp->flag &= ~LP_WITHIN_THR; pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, - tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, + tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max, lp->sowd >> 3); if (lp->flag & LP_WITHIN_THR) @@ -313,12 +313,12 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) /* happened within inference * drop snd_cwnd into 1 */ if (lp->flag & LP_WITHIN_INF) - tp->snd_cwnd = 1U; + tcp_snd_cwnd_set(tp, 1U); /* happened after inference * cut snd_cwnd into half */ else - tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U)); /* record this drop time */ lp->last_drop = now; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0588b004ddac..7029b0e98edb 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -388,15 +388,15 @@ void tcp_update_metrics(struct sock *sk) if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); - if (val && (tp->snd_cwnd >> 1) > val) + if (val && (tcp_snd_cwnd(tp) >> 1) > val) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, - tp->snd_cwnd >> 1); + tcp_snd_cwnd(tp) >> 1); } if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { val = tcp_metric_get(tm, TCP_METRIC_CWND); - if (tp->snd_cwnd > val) + if (tcp_snd_cwnd(tp) > val) tcp_metric_set(tm, TCP_METRIC_CWND, - tp->snd_cwnd); + tcp_snd_cwnd(tp)); } } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { @@ -404,10 +404,10 @@ void tcp_update_metrics(struct sock *sk) if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, - max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); + max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh)); if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { val = tcp_metric_get(tm, TCP_METRIC_CWND); - tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1); + tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1); } } else { /* Else slow start did not finish, cwnd is non-sense, diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index ab552356bdba..a60662f4bdf9 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -197,10 +197,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) } if (ca->cwnd_growth_factor < 0) { - cnt = tp->snd_cwnd << -ca->cwnd_growth_factor; + cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor; tcp_cong_avoid_ai(tp, cnt, acked); } else { - cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor); + cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor); tcp_cong_avoid_ai(tp, cnt, acked); } } @@ -209,7 +209,7 @@ static u32 tcpnv_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U); + return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U); } static void tcpnv_state(struct sock *sk, u8 new_state) @@ -257,7 +257,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) return; /* Stop cwnd growth if we were in catch up mode */ - if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) { + if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) { ca->nv_catchup = 0; ca->nv_allow_cwnd_growth = 0; } @@ -371,7 +371,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) * if cwnd < max_win, grow cwnd * else leave the same */ - if (tp->snd_cwnd > max_win) { + if (tcp_snd_cwnd(tp) > max_win) { /* there is congestion, check that it is ok * to make a CA decision * 1. We should have at least nv_dec_eval_min_calls @@ -398,20 +398,20 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) ca->nv_allow_cwnd_growth = 0; tp->snd_ssthresh = (nv_ssthresh_factor * max_win) >> 3; - if (tp->snd_cwnd - max_win > 2) { + if (tcp_snd_cwnd(tp) - max_win > 2) { /* gap > 2, we do exponential cwnd decrease */ int dec; - dec = max(2U, ((tp->snd_cwnd - max_win) * + dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) * nv_cong_dec_mult) >> 7); - tp->snd_cwnd -= dec; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec); } else if (nv_cong_dec_mult > 0) { - tp->snd_cwnd = max_win; + tcp_snd_cwnd_set(tp, max_win); } if (ca->cwnd_growth_factor > 0) ca->cwnd_growth_factor = 0; ca->nv_no_cong_cnt = 0; - } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) { + } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) { /* There is no congestion, grow cwnd if allowed*/ if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls) return; @@ -444,8 +444,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) * (it wasn't before, if it is now is because nv * decreased it). */ - if (tp->snd_cwnd < nv_min_cwnd) - tp->snd_cwnd = nv_min_cwnd; + if (tcp_snd_cwnd(tp) < nv_min_cwnd) + tcp_snd_cwnd_set(tp, nv_min_cwnd); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1ca2f28c9981..5f91a9536e00 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -143,7 +143,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) { struct tcp_sock *tp = tcp_sk(sk); u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); - u32 cwnd = tp->snd_cwnd; + u32 cwnd = tcp_snd_cwnd(tp); tcp_ca_event(sk, CA_EVENT_CWND_RESTART); @@ -152,7 +152,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; - tp->snd_cwnd = max(cwnd, restart_cwnd); + tcp_snd_cwnd_set(tp, max(cwnd, restart_cwnd)); tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_cwnd_used = 0; } @@ -1014,7 +1014,7 @@ static void tcp_tsq_write(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && - tp->snd_cwnd > tcp_packets_in_flight(tp)) { + tcp_snd_cwnd(tp) > tcp_packets_in_flight(tp)) { tcp_mstamp_refresh(tp); tcp_xmit_retransmit_queue(sk); } @@ -1861,9 +1861,9 @@ static void tcp_cwnd_application_limited(struct sock *sk) /* Limited by application or receiver window. */ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 win_used = max(tp->snd_cwnd_used, init_win); - if (win_used < tp->snd_cwnd) { + if (win_used < tcp_snd_cwnd(tp)) { tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; + tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1); } tp->snd_cwnd_used = 0; } @@ -2044,7 +2044,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, return 1; in_flight = tcp_packets_in_flight(tp); - cwnd = tp->snd_cwnd; + cwnd = tcp_snd_cwnd(tp); if (in_flight >= cwnd) return 0; @@ -2197,12 +2197,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, in_flight = tcp_packets_in_flight(tp); BUG_ON(tcp_skb_pcount(skb) <= 1); - BUG_ON(tp->snd_cwnd <= in_flight); + BUG_ON(tcp_snd_cwnd(tp) <= in_flight); send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; /* From in_flight test above, we know that cwnd > in_flight. */ - cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; + cong_win = (tcp_snd_cwnd(tp) - in_flight) * tp->mss_cache; limit = min(send_win, cong_win); @@ -2216,7 +2216,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor); if (win_divisor) { - u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); + u32 chunk = min(tp->snd_wnd, tcp_snd_cwnd(tp) * tp->mss_cache); /* If at least some fraction of a window is available, * just use it. @@ -2346,7 +2346,7 @@ static int tcp_mtu_probe(struct sock *sk) if (likely(!icsk->icsk_mtup.enabled || icsk->icsk_mtup.probe_size || inet_csk(sk)->icsk_ca_state != TCP_CA_Open || - tp->snd_cwnd < 11 || + tcp_snd_cwnd(tp) < 11 || tp->rx_opt.num_sacks || tp->rx_opt.dsack)) return -1; @@ -2382,7 +2382,7 @@ static int tcp_mtu_probe(struct sock *sk) return 0; /* Do we need to wait to drain cwnd? With none in flight, don't stall */ - if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) { + if (tcp_packets_in_flight(tp) + 2 > tcp_snd_cwnd(tp)) { if (!tcp_packets_in_flight(tp)) return -1; else @@ -2451,7 +2451,7 @@ static int tcp_mtu_probe(struct sock *sk) if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending * effectively two packets. */ - tp->snd_cwnd--; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); tcp_event_new_data_sent(sk, nskb); icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); @@ -2709,7 +2709,7 @@ repair: else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); - is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); + is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp)); if (likely(sent_pkts || is_cwnd_limited)) tcp_cwnd_validate(sk, is_cwnd_limited); @@ -2819,7 +2819,7 @@ void tcp_send_loss_probe(struct sock *sk) if (unlikely(!skb)) { WARN_ONCE(tp->packets_out, "invalid inflight: %u state %u cwnd %u mss %d\n", - tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss); inet_csk(sk)->icsk_pending = 0; return; } @@ -3303,7 +3303,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!hole) tp->retransmit_skb_hint = skb; - segs = tp->snd_cwnd - tcp_packets_in_flight(tp); + segs = tcp_snd_cwnd(tp) - tcp_packets_in_flight(tp); if (segs <= 0) break; sacked = TCP_SKB_CB(skb)->sacked; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 9a8e014d9b5b..a8f6d9d06f2e 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -200,7 +200,7 @@ void tcp_rate_check_app_limited(struct sock *sk) /* Nothing in sending host's qdisc queues or NIC tx queue. */ sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && /* We are not limited by CWND. */ - tcp_packets_in_flight(tp) < tp->snd_cwnd && + tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) && /* All lost packets have been retransmitted. */ tp->lost_out <= tp->retrans_out) tp->app_limited = diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fd113f6226ef..48f30e7209f2 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -2,11 +2,6 @@ #include <linux/tcp.h> #include <net/tcp.h> -static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) -{ - return t1 > t2 || (t1 == t2 && after(seq1, seq2)); -} - static u32 tcp_rack_reo_wnd(const struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -77,9 +72,9 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) !(scb->sacked & TCPCB_SACKED_RETRANS)) continue; - if (!tcp_rack_sent_after(tp->rack.mstamp, - tcp_skb_timestamp_us(skb), - tp->rack.end_seq, scb->end_seq)) + if (!tcp_skb_sent_after(tp->rack.mstamp, + tcp_skb_timestamp_us(skb), + tp->rack.end_seq, scb->end_seq)) break; /* A packet is lost if it has not been s/acked beyond @@ -140,8 +135,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, } tp->rack.advanced = 1; tp->rack.rtt_us = rtt_us; - if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp, - end_seq, tp->rack.end_seq)) { + if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp, + end_seq, tp->rack.end_seq)) { tp->rack.mstamp = xmit_time; tp->rack.end_seq = end_seq; } diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 5842081bc8a2..862b96248a92 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -27,7 +27,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!acked) return; } - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT), acked); } @@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); + return max(tcp_snd_cwnd(tp) - (tcp_snd_cwnd(tp)>>TCP_SCALABLE_MD_SCALE), 2U); } static struct tcp_congestion_ops tcp_scalable __read_mostly = { diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c8003c8aad2c..786848ad37ea 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { - return min(tp->snd_ssthresh, tp->snd_cwnd); + return min(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -217,14 +217,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) * This is: * (actual rate in segments) * baseRTT */ - target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT; + target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT; do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. */ - diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; + diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tcp_in_slow_start(tp)) { /* Going too fast. Time to slow down @@ -238,7 +238,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) * truncation robs us of full link * utilization. */ - tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), + (u32)target_cwnd + 1)); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tcp_in_slow_start(tp)) { @@ -254,14 +255,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* The old window was too fast, so * we slow down. */ - tp->snd_cwnd--; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } else { /* Sending just as fast as we * should be. @@ -269,10 +270,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) } } - if (tp->snd_cwnd < 2) - tp->snd_cwnd = 2; - else if (tp->snd_cwnd > tp->snd_cwnd_clamp) - tp->snd_cwnd = tp->snd_cwnd_clamp; + if (tcp_snd_cwnd(tp) < 2) + tcp_snd_cwnd_set(tp, 2); + else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) + tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); tp->snd_ssthresh = tcp_current_ssthresh(sk); } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index cd50a61c9976..366ff6f214b2 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -146,11 +146,11 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) rtt = veno->minrtt; - target_cwnd = (u64)tp->snd_cwnd * veno->basertt; + target_cwnd = (u64)tcp_snd_cwnd(tp) * veno->basertt; target_cwnd <<= V_PARAM_SHIFT; do_div(target_cwnd, rtt); - veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; + veno->diff = (tcp_snd_cwnd(tp) << V_PARAM_SHIFT) - target_cwnd; if (tcp_in_slow_start(tp)) { /* Slow start. */ @@ -164,15 +164,15 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* In the "non-congestive state", increase cwnd * every rtt. */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } else { /* In the "congestive state", increase cwnd * every other rtt. */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { if (veno->inc && - tp->snd_cwnd < tp->snd_cwnd_clamp) { - tp->snd_cwnd++; + tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) { + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); veno->inc = 0; } else veno->inc = 1; @@ -181,10 +181,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) tp->snd_cwnd_cnt += acked; } done: - if (tp->snd_cwnd < 2) - tp->snd_cwnd = 2; - else if (tp->snd_cwnd > tp->snd_cwnd_clamp) - tp->snd_cwnd = tp->snd_cwnd_clamp; + if (tcp_snd_cwnd(tp) < 2) + tcp_snd_cwnd_set(tp, 2); + else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) + tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); } /* Wipe the slate clean for the next rtt. */ /* veno->cntrtt = 0; */ @@ -199,10 +199,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk) if (veno->diff < beta) /* in "non-congestive state", cut cwnd by 1/5 */ - return max(tp->snd_cwnd * 4 / 5, 2U); + return max(tcp_snd_cwnd(tp) * 4 / 5, 2U); else /* in "congestive state", cut cwnd by 1/2 */ - return max(tp->snd_cwnd >> 1U, 2U); + return max(tcp_snd_cwnd(tp) >> 1U, 2U); } static struct tcp_congestion_ops tcp_veno __read_mostly = { diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index b2e05c4cea00..c6e97141eef2 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -244,7 +244,8 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) switch (event) { case CA_EVENT_COMPLETE_CWR: - tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + tcp_snd_cwnd_set(tp, tp->snd_ssthresh); break; case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 07c4c93b9fdb..18b07ff5d20e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -71,11 +71,11 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!yeah->doing_reno_now) { /* Scalable */ - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT), acked); } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. @@ -130,7 +130,7 @@ do_vegas: /* Compute excess number of packets above bandwidth * Avoid doing full 64 bit divide. */ - bw = tp->snd_cwnd; + bw = tcp_snd_cwnd(tp); bw *= rtt - yeah->vegas.baseRTT; do_div(bw, rtt); queue = bw; @@ -138,20 +138,20 @@ do_vegas: if (queue > TCP_YEAH_ALPHA || rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { if (queue > TCP_YEAH_ALPHA && - tp->snd_cwnd > yeah->reno_count) { + tcp_snd_cwnd(tp) > yeah->reno_count) { u32 reduction = min(queue / TCP_YEAH_GAMMA , - tp->snd_cwnd >> TCP_YEAH_EPSILON); + tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON); - tp->snd_cwnd -= reduction; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction); - tp->snd_cwnd = max(tp->snd_cwnd, - yeah->reno_count); + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), + yeah->reno_count)); - tp->snd_ssthresh = tp->snd_cwnd; + tp->snd_ssthresh = tcp_snd_cwnd(tp); } if (yeah->reno_count <= 2) - yeah->reno_count = max(tp->snd_cwnd>>1, 2U); + yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U); else yeah->reno_count++; @@ -176,7 +176,7 @@ do_vegas: */ yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt; yeah->vegas.beg_snd_nxt = tp->snd_nxt; - yeah->vegas.beg_snd_cwnd = tp->snd_cwnd; + yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp); /* Wipe the slate clean for the next RTT. */ yeah->vegas.cntRTT = 0; @@ -193,16 +193,16 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) if (yeah->doing_reno_now < TCP_YEAH_RHO) { reduction = yeah->lastQ; - reduction = min(reduction, max(tp->snd_cwnd>>1, 2U)); + reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U)); - reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); + reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA); } else - reduction = max(tp->snd_cwnd>>1, 2U); + reduction = max(tcp_snd_cwnd(tp)>>1, 2U); yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - return max_t(int, tp->snd_cwnd - reduction, 2); + return max_t(int, tcp_snd_cwnd(tp) - reduction, 2); } static struct tcp_congestion_ops tcp_yeah __read_mostly = { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6b4d8361560f..9d5071c79c95 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1726,7 +1726,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) EXPORT_SYMBOL(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *off, int *err) + int *off, int *err) { struct sk_buff_head *sk_queue = &sk->sk_receive_queue; struct sk_buff_head *queue; @@ -1735,7 +1735,6 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int error; queue = &udp_sk(sk)->reader_queue; - flags |= noblock ? MSG_DONTWAIT : 0; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb; @@ -1805,7 +1804,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, struct sk_buff *skb; int err, used; - skb = skb_recv_udp(sk, 0, 1, &err); + skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); if (!skb) return err; @@ -1843,8 +1842,8 @@ EXPORT_SYMBOL(udp_read_sock); * return it, otherwise we block. */ -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); @@ -1859,7 +1858,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, try_again: off = sk_peek_offset(sk, flags); - skb = __skb_recv_udp(sk, flags, noblock, &off, &err); + skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; @@ -1910,7 +1909,7 @@ try_again: UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index bbe6569c9ad3..ff15918b7bdc 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -11,14 +11,13 @@ static struct proto *udpv6_prot_saved __read_mostly; static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags, - addr_len); + return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len); #endif - return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len); + return udp_prot.recvmsg(sk, msg, len, flags, addr_len); } static bool udp_sk_has_data(struct sock *sk) @@ -61,7 +60,7 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, } static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_psock *psock; int copied, ret; @@ -71,10 +70,10 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, psock = sk_psock_get(sk); if (unlikely(!psock)) - return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return sk_udp_recvmsg(sk, msg, len, flags, addr_len); if (!psock_has_data(psock)) { - ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); goto out; } @@ -84,12 +83,12 @@ msg_bytes_ready: long timeo; int data; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = udp_msg_wait_data(sk, psock, timeo); if (data) { if (psock_has_data(psock)) goto msg_bytes_ready; - ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); goto out; } copied = -EAGAIN; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 2878d8285caf..4ba7a88a1b1d 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -17,8 +17,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len); +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void udp_destroy_sock(struct sock *sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b22504176588..cde242dca530 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -335,7 +335,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) { int i; - idev->stats.ipv6 = alloc_percpu(struct ipstats_mib); + idev->stats.ipv6 = alloc_percpu_gfp(struct ipstats_mib, GFP_KERNEL_ACCOUNT); if (!idev->stats.ipv6) goto err_ip; @@ -351,7 +351,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) if (!idev->stats.icmpv6dev) goto err_icmp; idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!idev->stats.icmpv6msgdev) goto err_icmpmsg; @@ -375,7 +375,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL); - ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); + ndev = kzalloc(sizeof(*ndev), GFP_KERNEL_ACCOUNT); if (!ndev) return ERR_PTR(err); @@ -797,6 +797,7 @@ static void dev_forward_change(struct inet6_dev *idev) { struct net_device *dev; struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); if (!idev) return; @@ -815,14 +816,24 @@ static void dev_forward_change(struct inet6_dev *idev) } } + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa->flags&IFA_F_TENTATIVE) continue; + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + } + read_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); if (idev->cnf.forwarding) addrconf_join_anycast(ifa); else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); @@ -3728,7 +3739,8 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa, *tmp; + struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); bool keep_addr = false; bool was_ready; int state, i; @@ -3820,16 +3832,23 @@ restart: write_lock_bh(&idev->lock); } - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + list_for_each_entry(ifa, &idev->addr_list, if_list) + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + write_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { struct fib6_info *rt = NULL; bool keep; + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); + addrconf_del_dad_work(ifa); keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr); - write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); if (keep) { @@ -3860,15 +3879,14 @@ restart: addrconf_leave_solict(ifa->idev, &ifa->addr); } - write_lock_bh(&idev->lock); if (!keep) { + write_lock_bh(&idev->lock); list_del_rcu(&ifa->if_list); + write_unlock_bh(&idev->lock); in6_ifa_put(ifa); } } - write_unlock_bh(&idev->lock); - /* Step 5: Discard anycast and multicast list */ if (unregister) { ipv6_ac_destroy_dev(idev); @@ -4201,7 +4219,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_rs = send_mld && ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && - (dev->flags&IFF_LOOPBACK) == 0; + (dev->flags & IFF_LOOPBACK) == 0 && + (dev->type != ARPHRD_TUNNEL); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. @@ -5569,6 +5588,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; + array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na; } static inline size_t inet6_ifla6_size(void) @@ -7020,6 +7040,15 @@ static const struct ctl_table addrconf_sysctl[] = { .extra2 = (void *)SYSCTL_ONE, }, { + .procname = "accept_unsolicited_na", + .data = &ipv6_devconf.accept_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { /* sentinel */ } }; @@ -7031,7 +7060,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; - table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL); + table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL_ACCOUNT); if (!table) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7d7b7523d126..70564ddccc46 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -318,7 +318,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Binding to v4-mapped address on a v6-only socket * makes no sense */ - if (sk->sk_ipv6only) { + if (ipv6_only_sock(sk)) { err = -EINVAL; goto out; } @@ -654,7 +654,7 @@ int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, - size_t, int, int, int *)); + size_t, int, int *)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -669,8 +669,7 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, - sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, &addr_len); + sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 206f66310a88..39b2327edc4e 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -145,7 +145,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int err; if (usin->sin6_family == AF_INET) { - if (__ipv6_only_sock(sk)) + if (ipv6_only_sock(sk)) return -EAFNOSUPPORT; err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; @@ -178,7 +178,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_type & IPV6_ADDR_MAPPED) { struct sockaddr_in sin; - if (__ipv6_only_sock(sk)) { + if (ipv6_only_sock(sk)) { err = -ENETUNREACH; goto out; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 658d5eabaf7e..a8d961d3a477 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -90,12 +90,13 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff, break; fallthrough; case 2: /* send ICMP PARM PROB regardless and drop packet */ - icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); + icmpv6_param_prob_reason(skb, ICMPV6_UNK_OPTION, optoff, + SKB_DROP_REASON_UNHANDLED_PROTO); return false; } drop: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO); return false; } @@ -218,7 +219,7 @@ static bool ip6_parse_tlv(bool hopbyhop, if (len == 0) return true; bad: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } @@ -232,6 +233,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); + SKB_DR(reason); int ret; if (opt->dsthao) { @@ -246,19 +248,23 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) if (hao->length != 16) { net_dbg_ratelimited("hao invalid option length = %d\n", hao->length); + SKB_DR_SET(reason, IP_INHDR); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { net_dbg_ratelimited("hao is not an unicast addr: %pI6\n", &hao->addr); + SKB_DR_SET(reason, INVALID_PROTO); goto discard; } ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + SKB_DR_SET(reason, XFRM_POLICY); goto discard; + } if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -281,7 +287,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) return true; discard: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return false; } #endif @@ -487,7 +493,6 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct inet6_dev *idev; struct ipv6hdr *oldhdr; - struct in6_addr addr; unsigned char *buf; int accept_rpl_seg; int i, err; @@ -616,9 +621,7 @@ looped_back: return -1; } - addr = ipv6_hdr(skb)->daddr; - ipv6_hdr(skb)->daddr = ohdr->rpl_segaddr[i]; - ohdr->rpl_segaddr[i] = addr; + swap(ipv6_hdr(skb)->daddr, ohdr->rpl_segaddr[i]); ipv6_rpl_srh_compress(chdr, ohdr, &ipv6_hdr(skb)->daddr, n); @@ -934,7 +937,7 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) } net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n", nh[optoff + 1]); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } @@ -988,7 +991,7 @@ ignore: return true; drop: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } @@ -997,31 +1000,30 @@ drop: static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); - struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); - struct net *net = ipv6_skb_net(skb); + SKB_DR(reason); u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + SKB_DR_SET(reason, IP_INHDR); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); + icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff + 2, + SKB_DROP_REASON_IP_INHDR); return false; } if (ipv6_hdr(skb)->payload_len) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); + icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff, + SKB_DROP_REASON_IP_INHDR); return false; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS); + SKB_DR_SET(reason, PKT_TOO_SMALL); goto drop; } @@ -1032,7 +1034,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) return true; drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return false; } @@ -1054,7 +1056,7 @@ static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff) return true; drop: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e6b978ea0e87..61770220774e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -629,12 +629,13 @@ out_bh_enable: } EXPORT_SYMBOL(icmp6_send); -/* Slightly more convenient version of icmp6_send. +/* Slightly more convenient version of icmp6_send with drop reasons. */ -void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, + enum skb_drop_reason reason) { icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); - kfree_skb(skb); + kfree_skb_reason(skb, reason); } /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH @@ -864,21 +865,23 @@ out: static int icmpv6_rcv(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct net *net = dev_net(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; - bool success = false; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & - XFRM_STATE_ICMP)) + XFRM_STATE_ICMP)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; + } if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; @@ -886,8 +889,11 @@ static int icmpv6_rcv(struct sk_buff *skb) nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*hdr)); - if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, + skb)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; + } skb_set_network_header(skb, nh); } @@ -924,11 +930,11 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - success = ping_rcv(skb); + reason = ping_rcv(skb); break; case ICMPV6_EXT_ECHO_REPLY: - success = ping_rcv(skb); + reason = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -994,19 +1000,20 @@ static int icmpv6_rcv(struct sk_buff *skb) /* until the v6 path can be better sorted assume failure and * preserve the status quo behaviour for the rest of the paths to here */ - if (success) - consume_skb(skb); + if (reason) + kfree_skb_reason(skb, reason); else - kfree_skb(skb); + consume_skb(skb); return 0; csum_error: + reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 5136959b3dc5..4e37f7c29900 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -382,11 +382,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, goto failed_free; ip6gre_tnl_link_config(nt, 1); - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & TUNNEL_SEQ)) - dev->features |= NETIF_F_LLTX; - ip6gre_tunnel_link(ign, nt); return nt; @@ -1445,26 +1440,23 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static void ip6gre_tnl_init_features(struct net_device *dev) { struct ip6_tnl *nt = netdev_priv(dev); + __be16 flags; - dev->features |= GRE6_FEATURES; + dev->features |= GRE6_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE6_FEATURES; - if (!(nt->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported, nor - * can we support 2 levels of outer headers requiring - * an update. - */ - if (!(nt->parms.o_flags & TUNNEL_CSUM) || - nt->encap.type == TUNNEL_ENCAP_NONE) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + flags = nt->parms.o_flags; - /* Can use a lockless transmit, unless we generate - * output sequences - */ - dev->features |= NETIF_F_LLTX; - } + /* TCP offload with GRE SEQ is not supported, nor can we support 2 + * levels of outer headers requiring an update. + */ + if (flags & TUNNEL_SEQ) + return; + if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE) + return; + + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; } static int ip6gre_tunnel_init_common(struct net_device *dev) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5b5ea35635f9..0322cc86b84e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -145,12 +145,14 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk, static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, struct net *net) { + enum skb_drop_reason reason; const struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; if (skb->pkt_type == PACKET_OTHERHOST) { - kfree_skb(skb); + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); + kfree_skb_reason(skb, SKB_DROP_REASON_OTHERHOST); return NULL; } @@ -160,9 +162,12 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len); + SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); + if (idev && unlikely(idev->cnf.disable_ipv6)) + SKB_DR_SET(reason, IPV6DISABLED); goto drop; } @@ -186,8 +191,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, hdr = ipv6_hdr(skb); - if (hdr->version != 6) + if (hdr->version != 6) { + SKB_DR_SET(reason, UNHANDLED_PROTO); goto err; + } __IP6_ADD_STATS(net, idev, IPSTATS_MIB_NOECTPKTS + @@ -225,8 +232,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (!ipv6_addr_is_multicast(&hdr->daddr) && (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) && - idev->cnf.drop_unicast_in_l2_multicast) + idev->cnf.drop_unicast_in_l2_multicast) { + SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); goto err; + } /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope @@ -255,12 +264,11 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS); + SKB_DR_SET(reason, PKT_TOO_SMALL); goto drop; } - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - goto drop; - } + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + goto err; hdr = ipv6_hdr(skb); } @@ -281,9 +289,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, return skb; err: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + SKB_DR_OR(reason, IP_INHDR); drop: rcu_read_unlock(); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NULL; } @@ -353,6 +362,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; + SKB_DR(reason); bool raw; /* @@ -412,12 +422,16 @@ resubmit_final: if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(dev, &hdr->daddr, &hdr->saddr) && - !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) + !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) { + SKB_DR_SET(reason, IP_INADDRERRORS); goto discard; + } } if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && - !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto discard; + } ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, skb); @@ -443,8 +457,11 @@ resubmit_final: IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); + SKB_DR_SET(reason, IP_NOPROTO); + } else { + SKB_DR_SET(reason, XFRM_POLICY); } - kfree_skb(skb); + kfree_skb_reason(skb, reason); } else { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); consume_skb(skb); @@ -454,7 +471,7 @@ resubmit_final: discard: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - kfree_skb(skb); + kfree_skb_reason(skb, reason); } static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fa63ef2bd99c..afa5bd4ad167 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -119,19 +119,21 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * rcu_read_lock_bh(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dev, false); - if (!IS_ERR(neigh)) { - sock_confirm_neigh(skb, neigh); - ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); - return ret; + + if (unlikely(IS_ERR_OR_NULL(neigh))) { + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dev, false); + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); + return -EINVAL; + } } + sock_confirm_neigh(skb, neigh); + ret = neigh_output(neigh, skb, false); rcu_read_unlock_bh(); - - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); - kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); - return -EINVAL; + return ret; } static int @@ -198,7 +200,6 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_SUCCESS: - return __ip6_finish_output(net, sk, skb); case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: @@ -469,6 +470,7 @@ int ip6_forward(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); struct inet6_dev *idev; + SKB_DR(reason); u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); @@ -518,7 +520,7 @@ int ip6_forward(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return -ETIMEDOUT; } @@ -537,6 +539,7 @@ int ip6_forward(struct sk_buff *skb) if (!xfrm6_route_forward(skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); + SKB_DR_SET(reason, XFRM_POLICY); goto drop; } dst = skb_dst(skb); @@ -596,7 +599,7 @@ int ip6_forward(struct sk_buff *skb) __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); return -EMSGSIZE; } @@ -618,8 +621,9 @@ int ip6_forward(struct sk_buff *skb) error: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); + SKB_DR_SET(reason, IP_INADDRERRORS); drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return -EINVAL; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 53f632a560ec..19325b7600bb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -257,8 +257,6 @@ static int ip6_tnl_create2(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); int err; - t = netdev_priv(dev); - dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fcb288b0ae13..254addad0dd3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -979,6 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; + bool create_neigh; if (skb->len < sizeof(struct nd_msg)) { ND_PRINTK(2, warn, "NA: packet too short\n"); @@ -999,6 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. + * drop_unsolicited_na takes precedence over accept_unsolicited_na */ if (!msg->icmph.icmp6_solicited && idev && idev->cnf.drop_unsolicited_na) @@ -1039,7 +1041,23 @@ static void ndisc_recv_na(struct sk_buff *skb) in6_ifa_put(ifp); return; } - neigh = neigh_lookup(&nd_tbl, &msg->target, dev); + /* RFC 9131 updates original Neighbour Discovery RFC 4861. + * An unsolicited NA can now create a neighbour cache entry + * on routers if it has Target LL Address option. + * + * drop accept fwding behaviour + * ---- ------ ------ ---------------------------------------------- + * 1 X X Drop NA packet and don't pass up the stack + * 0 0 X Pass NA packet up the stack, don't update NC + * 0 1 0 Pass NA packet up the stack, don't update NC + * 0 1 1 Pass NA packet up the stack, and add a STALE + * NC entry + * Note that we don't do a (daddr == all-routers-mcast) check. + */ + create_neigh = !msg->icmph.icmp6_solicited && lladdr && + idev && idev->cnf.forwarding && + idev->cnf.accept_unsolicited_na; + neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh); if (neigh) { u8 old_flags = neigh->flags; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index b3f163b40c2b..8970d0b4faeb 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, fl6->daddr = iph->daddr; fl6->saddr = iph->saddr; } else { + if (nft_hook(pkt) == NF_INET_FORWARD && + priv->flags & NFTA_FIB_F_IIF) + fl6->flowi6_iif = nft_out(pkt)->ifindex; + fl6->daddr = iph->saddr; fl6->saddr = iph->daddr; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c51d5ce3711c..3b7cbd522b54 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -460,7 +460,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) */ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -477,7 +477,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (np->rxpmtu && np->rxopt.bits.rxpmtu) return ipv6_recv_rxpmtu(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -512,7 +512,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, *addr_len = sizeof(*sin6); } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c4b6ce017d5e..d25dc83bac62 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4483,6 +4483,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(dst->dev); struct inet6_dev *idev; + SKB_DR(reason); int type; if (netif_is_l3_master(skb->dev) || @@ -4495,11 +4496,14 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { + SKB_DR_SET(reason, IP_INADDRERRORS); IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } + SKB_DR_SET(reason, IP_INNOROUTES); fallthrough; case IPSTATS_MIB_OUTNOROUTES: + SKB_DR_OR(reason, IP_OUTNOROUTES); IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; } @@ -4509,7 +4513,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) skb_dst_drop(skb); icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d53dd142bf87..94a0a294c6a1 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -23,8 +23,6 @@ #endif #include <linux/ioam6.h> -static int two = 2; -static int three = 3; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = @@ -172,7 +170,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "fib_multipath_hash_fields", @@ -197,7 +195,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "ioam6_id", diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 13678d3908fa..60bdec257ba7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -230,7 +230,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; - if (__ipv6_only_sock(sk)) + if (ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; @@ -1728,7 +1728,6 @@ process: sk_incoming_cpu_update(sk); - sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; @@ -2044,7 +2043,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), - tp->snd_cwnd, + tcp_snd_cwnd(tp), state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7f0fa9bd9ffe..3fc97d4621ac 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -322,7 +322,7 @@ static int udp6_skb_len(struct sk_buff *skb) */ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -342,7 +342,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: off = sk_peek_offset(sk, flags); - skb = __skb_recv_udp(sk, flags, noblock, &off, &err); + skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; @@ -391,7 +391,7 @@ try_again: if (!peeking) SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { @@ -1123,7 +1123,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, * bytes that are out of the bound specified by user in addr_len. */ if (uaddr->sa_family == AF_INET) { - if (__ipv6_only_sock(sk)) + if (ipv6_only_sock(sk)) return -EAFNOSUPPORT; return udp_pre_connect(sk, uaddr, addr_len); } @@ -1359,7 +1359,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); do_udp_sendmsg: - if (__ipv6_only_sock(sk)) + if (ipv6_only_sock(sk)) return -ENETUNREACH; return udp_sendmsg(sk, msg, len); } diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index b2fcc46c1630..4251e49d32a0 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -20,8 +20,8 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); -int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a1760add5bf1..a0385ddbffcf 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1223,7 +1223,6 @@ static void iucv_process_message_q(struct sock *sk) static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int copied, rlen; @@ -1242,7 +1241,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, /* receive/dequeue next skb: * the function understands MSG_PEEK and, thus, does not dequeue skb */ - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; diff --git a/net/key/af_key.c b/net/key/af_key.c index fd51db3be91c..175a162eec58 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3696,7 +3696,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; @@ -3711,7 +3711,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (err) goto out_free; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b3edafa5fba4..6af09e188e52 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -515,7 +515,7 @@ no_route: } static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) + size_t len, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -526,7 +526,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 96f975777438..217c7192691e 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -657,7 +657,7 @@ do_confirm: } static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -671,7 +671,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index bf35710127dd..8be1fdc68a0b 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -191,8 +191,7 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, goto end; err = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto end; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 218cdc554d71..bfab39320004 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -263,7 +263,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - if (sta->sta.he_cap.has_he && addbaext) + if (sta->sta.deflink.he_cap.has_he && addbaext) ieee80211_add_addbaext(sdata, skb, addbaext, buf_size); ieee80211_tx_skb(sdata, skb); @@ -296,7 +296,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (!sta->sta.ht_cap.ht_supported && + if (!sta->sta.deflink.ht_cap.ht_supported && sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", @@ -312,9 +312,9 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (sta->sta.eht_cap.has_eht) + if (sta->sta.deflink.eht_cap.has_eht) max_buf_size = IEEE80211_MAX_AMPDU_BUF_EHT; - else if (sta->sta.he_cap.has_he) + else if (sta->sta.deflink.he_cap.has_he) max_buf_size = IEEE80211_MAX_AMPDU_BUF_HE; else max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT; @@ -324,7 +324,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, * and if buffer size does not exceeds max value */ /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) && - (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || + (!(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > max_buf_size)) { status = WLAN_STATUS_INVALID_QOS_PARAM; ht_dbg_ratelimited(sta->sdata, @@ -507,7 +507,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, goto free; } - if (sta->sta.eht_cap.has_eht && elems && elems->addba_ext_ie) { + if (sta->sta.deflink.eht_cap.has_eht && elems && elems->addba_ext_ie) { u8 buf_size_1k = u8_get_bits(elems->addba_ext_ie->data, IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 1deb3d874a4b..91878ed5ec46 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -467,7 +467,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, sta->ampdu_mlme.addba_req_num[tid]++; spin_unlock_bh(&sta->lock); - if (sta->sta.he_cap.has_he) { + if (sta->sta.deflink.he_cap.has_he) { buf_size = local->hw.max_tx_aggregation_subframes; } else { /* @@ -594,7 +594,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; - if (!pubsta->ht_cap.ht_supported && + if (!pubsta->deflink.ht_cap.ht_supported && sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) return -EINVAL; @@ -647,7 +647,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, * is set when we receive a bss info from a probe response or a beacon. */ if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && - !sta->sta.ht_cap.ht_supported) { + !sta->sta.deflink.ht_cap.ht_supported) { ht_dbg(sdata, "BA request denied - IBSS STA %pM does not advertise HT support\n", pubsta->addr); diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 2619e12c8bda..4bab1683652d 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -647,8 +647,8 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_rx_status stat; - struct ieee80211_tx_rate *tx_rate = &sta->tx_stats.last_rate; - struct rate_info *ri = &sta->tx_stats.last_rate_info; + struct ieee80211_tx_rate *tx_rate = &sta->deflink.tx_stats.last_rate; + struct rate_info *ri = &sta->deflink.tx_stats.last_rate_info; u32 duration, overhead; u8 agg_shift; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ba752539d1d9..f1d211e61e49 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -570,7 +570,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, if (pairwise) key = key_mtx_dereference(local, sta->ptk[key_idx]); else - key = key_mtx_dereference(local, sta->gtk[key_idx]); + key = key_mtx_dereference(local, + sta->deflink.gtk[key_idx]); } else key = key_mtx_dereference(local, sdata->keys[key_idx]); @@ -620,7 +621,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, else if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) - key = rcu_dereference(sta->gtk[key_idx]); + key = rcu_dereference(sta->deflink.gtk[key_idx]); } else key = rcu_dereference(sdata->keys[key_idx]); @@ -1728,9 +1729,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->listen_interval = params->listen_interval; if (params->sta_modify_mask & STATION_PARAM_APPLY_STA_TXPOWER) { - sta->sta.txpwr.type = params->txpwr.type; + sta->sta.deflink.txpwr.type = params->txpwr.type; if (params->txpwr.type == NL80211_TX_POWER_LIMITED) - sta->sta.txpwr.power = params->txpwr.power; + sta->sta.deflink.txpwr.power = params->txpwr.power; ret = drv_sta_set_txpwr(local, sdata, sta); if (ret) return ret; @@ -1740,7 +1741,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef, sband, params->supported_rates, params->supported_rates_len, - &sta->sta.supp_rates[sband->band]); + &sta->sta.deflink.supp_rates[sband->band]); } if (params->ht_capa) @@ -3306,13 +3307,14 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + if (!sdata->u.ap.next_beacon) + return -EINVAL; + err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, NULL, NULL); - if (sdata->u.ap.next_beacon) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; - } + kfree(sdata->u.ap.next_beacon->mbssid_ies); + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; if (err < 0) return err; @@ -4314,13 +4316,14 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: { int ret; + if (!sdata->u.ap.next_beacon) + return -EINVAL; + ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, NULL, NULL); - if (sdata->u.ap.next_beacon) { - kfree(sdata->u.ap.next_beacon->mbssid_ies); - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; - } + kfree(sdata->u.ap.next_beacon->mbssid_ies); + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; if (ret < 0) return ret; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e26d42de14ec..e3452445b363 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -199,7 +199,7 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta) switch (width) { case IEEE80211_STA_RX_BW_20: - if (sta->sta.ht_cap.ht_supported) + if (sta->sta.deflink.ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20; else return NL80211_CHAN_WIDTH_20_NOHT; @@ -375,15 +375,15 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, new_sta_bw = ieee80211_sta_cur_vht_bw(sta); /* nothing change */ - if (new_sta_bw == sta->sta.bandwidth) + if (new_sta_bw == sta->sta.deflink.bandwidth) continue; /* vif changed to narrow BW and narrow BW for station wasn't * requested or vise versa */ - if ((new_sta_bw < sta->sta.bandwidth) == !narrowed) + if ((new_sta_bw < sta->sta.deflink.bandwidth) == !narrowed) continue; - sta->sta.bandwidth = new_sta_bw; + sta->sta.deflink.bandwidth = new_sta_bw; rate_control_rate_update(local, sband, sta, IEEE80211_RC_BW_CHANGED); } diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index f4c9a92f50f9..1fe43b264d75 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -504,6 +504,7 @@ static const char *hw_flag_names[] = { FLAG(SUPPORTS_TX_ENCAP_OFFLOAD), FLAG(SUPPORTS_RX_DECAP_OFFLOAD), FLAG(SUPPORTS_CONC_MON_RX_DECAP), + FLAG(DETECTS_COLOR_COLLISION), #undef FLAG }; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 88d9cc945a21..182094be9001 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -447,7 +447,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, int i; ssize_t bufsz = 512; struct sta_info *sta = file->private_data; - struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap; + struct ieee80211_sta_ht_cap *htc = &sta->sta.deflink.ht_cap; ssize_t ret; buf = kzalloc(bufsz, GFP_KERNEL); @@ -531,7 +531,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, { char *buf, *p; struct sta_info *sta = file->private_data; - struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap *vhtc = &sta->sta.deflink.vht_cap; ssize_t ret; ssize_t bufsz = 512; @@ -646,7 +646,7 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, char *buf, *p; size_t buf_sz = PAGE_SIZE; struct sta_info *sta = file->private_data; - struct ieee80211_sta_he_cap *hec = &sta->sta.he_cap; + struct ieee80211_sta_he_cap *hec = &sta->sta.deflink.he_cap; struct ieee80211_he_mcs_nss_supp *nss = &hec->he_mcs_nss_supp; u8 ppe_size; u8 *cap; @@ -1052,9 +1052,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(vht_capa); DEBUGFS_ADD(he_capa); - DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates); - DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); - DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered); + DEBUGFS_ADD_COUNTER(rx_duplicates, deflink.rx_stats.num_duplicates); + DEBUGFS_ADD_COUNTER(rx_fragments, deflink.rx_stats.fragments); + DEBUGFS_ADD_COUNTER(tx_filtered, deflink.status_stats.filtered); if (local->ops->wake_tx_queue) { DEBUGFS_ADD(aqm); diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 364ad0ef7692..96c9486bf2fe 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -14,7 +14,7 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_eht_cap_elem *eht_cap_ie_elem, u8 eht_cap_len, struct sta_info *sta) { - struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.eht_cap; + struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.deflink.eht_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 eht_ppe_size = 0; u8 mcs_nss_size; @@ -71,6 +71,6 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, eht_cap->has_eht = true; - sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); - sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); + sta->deflink.cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); + sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta); } diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index b2253df54413..31cd3c1ac07f 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -114,7 +114,7 @@ static void ieee80211_get_stats(struct net_device *dev, sta_set_sinfo(sta, &sinfo, false); i = 0; - ADD_STA_STATS(sta); + ADD_STA_STATS(sta->link[0]); data[i++] = sta->sta_state; @@ -140,7 +140,7 @@ static void ieee80211_get_stats(struct net_device *dev, memset(&sinfo, 0, sizeof(sinfo)); sta_set_sinfo(sta, &sinfo, false); i = 0; - ADD_STA_STATS(sta); + ADD_STA_STATS(sta->link[0]); } } diff --git a/net/mac80211/he.c b/net/mac80211/he.c index c05af7018f79..1a61f7552edd 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -49,7 +49,7 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_ break; } - sta->sta.he_6ghz_capa = *he_6ghz_capa; + sta->sta.deflink.he_6ghz_capa = *he_6ghz_capa; } static void ieee80211_he_mcs_disable(__le16 *he_mcs) @@ -110,7 +110,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_he_6ghz_capa *he_6ghz_capa, struct sta_info *sta) { - struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; + struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap; struct ieee80211_sta_he_cap own_he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; @@ -153,8 +153,8 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, he_cap->has_he = true; - sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); - sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); + sta->deflink.cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); + sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta); if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa) ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 2eb7641f5556..171bd16b13f3 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -243,9 +243,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; apply: - changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + changed = memcmp(&sta->sta.deflink.ht_cap, &ht_cap, sizeof(ht_cap)); - memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + memcpy(&sta->sta.deflink.ht_cap, &ht_cap, sizeof(ht_cap)); switch (sdata->vif.bss_conf.chandef.width) { default: @@ -264,9 +264,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, break; } - sta->sta.bandwidth = bw; + sta->sta.deflink.bandwidth = bw; - sta->cur_max_bandwidth = + sta->deflink.cur_max_bandwidth = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0416c4d22292..14c04fd48b7a 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -637,7 +637,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; - sta->sta.supp_rates[band] = supp_rates | + sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband, scan_width); return ieee80211_ibss_finish_sta(sta); @@ -1005,7 +1005,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, if (sta) { u32 prev_rates; - prev_rates = sta->sta.supp_rates[band]; + prev_rates = sta->sta.deflink.supp_rates[band]; /* make sure mandatory rates are always added */ scan_width = NL80211_BSS_CHAN_WIDTH_20; if (rx_status->bw == RATE_INFO_BW_5) @@ -1013,13 +1013,13 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, else if (rx_status->bw == RATE_INFO_BW_10) scan_width = NL80211_BSS_CHAN_WIDTH_10; - sta->sta.supp_rates[band] = supp_rates | + sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband, scan_width); - if (sta->sta.supp_rates[band] != prev_rates) { + if (sta->sta.deflink.supp_rates[band] != prev_rates) { ibss_dbg(sdata, "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", sta->sta.addr, prev_rates, - sta->sta.supp_rates[band]); + sta->sta.deflink.supp_rates[band]); rates_updated = true; } } else { @@ -1043,7 +1043,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, /* we both use HT */ struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; - enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; + enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth; cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT); ieee80211_chandef_ht_oper(elems->ht_operation, &chandef); @@ -1058,7 +1058,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_40) { /* we both use VHT */ struct ieee80211_vht_cap cap_ie; - struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap; + struct ieee80211_sta_vht_cap cap = sta->sta.deflink.vht_cap; u32 vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); @@ -1069,11 +1069,11 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, &cap_ie, sta); - if (memcmp(&cap, &sta->sta.vht_cap, sizeof(cap))) + if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap))) rates_updated |= true; } - if (bw != sta->sta.bandwidth) + if (bw != sta->sta.deflink.bandwidth) rates_updated |= true; if (!cfg80211_chandef_compatible(&sdata->u.ibss.chandef, @@ -1083,12 +1083,12 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, if (sta && rates_updated) { u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED; - u8 rx_nss = sta->sta.rx_nss; + u8 rx_nss = sta->sta.deflink.rx_nss; /* Force rx_nss recalculation */ - sta->sta.rx_nss = 0; + sta->sta.deflink.rx_nss = 0; rate_control_rate_init(sta); - if (sta->sta.rx_nss != rx_nss) + if (sta->sta.deflink.rx_nss != rx_nss) changed |= IEEE80211_RC_NSS_CHANGED; drv_sta_rc_update(local, sdata, &sta->sta, changed); @@ -1235,7 +1235,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; - sta->sta.supp_rates[band] = supp_rates | + sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband, scan_width); spin_lock(&ifibss->incomplete_lock); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index f695fc80088b..0fcf8aebedc4 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -476,7 +476,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) _ieee80211_set_tx_key(new, true); } else { - rcu_assign_pointer(sta->gtk[idx], new); + rcu_assign_pointer(sta->deflink.gtk[idx], new); } /* Only needed for transition from no key -> key. * Still triggers unnecessary when using Extended Key ID @@ -826,7 +826,8 @@ int ieee80211_key_link(struct ieee80211_key *key, (old_key && old_key->conf.cipher != key->conf.cipher)) goto out; } else if (sta) { - old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]); + old_key = key_mtx_dereference(sdata->local, + sta->deflink.gtk[idx]); } else { old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); } @@ -1076,8 +1077,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, int i; mutex_lock(&local->key_mtx); - for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) { - key = key_mtx_dereference(local, sta->gtk[i]); + for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) { + key = key_mtx_dereference(local, sta->deflink.gtk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, key->sta, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 44a6fdb6efbd..58ebdcd69d05 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -310,7 +310,7 @@ void ieee80211s_update_metric(struct ieee80211_local *local, LINK_FAIL_THRESH) mesh_plink_broken(sta); - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); + sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &rinfo); ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, cfg80211_calculate_bitrate(&rinfo)); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a829470dd59e..42ba7424589e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -61,8 +61,8 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; return rssi_threshold == 0 || (sta && - (s8)-ewma_signal_read(&sta->rx_stats_avg.signal) > - rssi_threshold); + (s8)-ewma_signal_read(&sta->deflink.rx_stats_avg.signal) > + rssi_threshold); } /** @@ -125,7 +125,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) continue; short_slot = false; - if (erp_rates & sta->sta.supp_rates[sband->band]) + if (erp_rates & sta->sta.deflink.supp_rates[sband->band]) short_slot = true; else break; @@ -175,10 +175,10 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) sta->mesh->plink_state != NL80211_PLINK_ESTAB) continue; - if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20) + if (sta->sta.deflink.bandwidth > IEEE80211_STA_RX_BW_20) continue; - if (!sta->sta.ht_cap.ht_supported) { + if (!sta->sta.deflink.ht_cap.ht_supported) { mpl_dbg(sdata, "nonHT sta (%pM) is present\n", sta->sta.addr); non_ht_sta = true; @@ -415,7 +415,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; u32 rates, basic_rates = 0, changed = 0; - enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; + enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth; sband = ieee80211_get_sband(sdata); if (!sband) @@ -425,7 +425,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, &basic_rates); spin_lock_bh(&sta->mesh->plink_lock); - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; /* rates and capabilities don't change during peering */ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB && @@ -433,9 +433,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, goto out; sta->mesh->processed_beacon = true; - if (sta->sta.supp_rates[sband->band] != rates) + if (sta->sta.deflink.supp_rates[sband->band] != rates) changed |= IEEE80211_RC_SUPP_RATES_CHANGED; - sta->sta.supp_rates[sband->band] = rates; + sta->sta.deflink.supp_rates[sband->band] = rates; if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems->ht_cap_elem, sta)) @@ -449,16 +449,16 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, elems->he_6ghz_capa, sta); - if (bw != sta->sta.bandwidth) + if (bw != sta->sta.deflink.bandwidth) changed |= IEEE80211_RC_BW_CHANGED; /* HT peer is operating 20MHz-only */ if (elems->ht_operation && !(elems->ht_operation->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) { - if (sta->sta.bandwidth != IEEE80211_STA_RX_BW_20) + if (sta->sta.deflink.bandwidth != IEEE80211_STA_RX_BW_20) changed |= IEEE80211_RC_BW_CHANGED; - sta->sta.bandwidth = IEEE80211_STA_RX_BW_20; + sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20; } if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1b30c724ca8d..b857915881e0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3342,7 +3342,7 @@ static bool ieee80211_twt_req_supported(const struct sta_info *sta, if (!(elems->ext_capab[9] & WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT)) return false; - return sta->sta.he_cap.he_cap_elem.mac_cap_info[0] & + return sta->sta.deflink.he_cap.he_cap_elem.mac_cap_info[0] & IEEE80211_HE_MAC_CAP0_TWT_RES; } @@ -3369,7 +3369,7 @@ static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata, ieee80211_vif_type_p2p(&sdata->vif)); return bss_conf->he_support && - (sta->sta.he_cap.he_cap_elem.mac_cap_info[2] & + (sta->sta.deflink.he_cap.he_cap_elem.mac_cap_info[2] & IEEE80211_HE_MAC_CAP2_BCAST_TWT) && own_he_cap && (own_he_cap->he_cap_elem.mac_cap_info[2] & @@ -3587,7 +3587,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, elems->he_6ghz_capa, sta); - bss_conf->he_support = sta->sta.he_cap.has_he; + bss_conf->he_support = sta->sta.deflink.he_cap.has_he; if (elems->rsnx && elems->rsnx_len && (elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) && wiphy_ext_feature_isset(local->hw.wiphy, @@ -3607,7 +3607,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, elems->eht_cap_len, sta); - bss_conf->eht_support = sta->sta.eht_cap.has_eht; + bss_conf->eht_support = sta->sta.deflink.eht_cap.has_eht; } else { bss_conf->eht_support = false; } @@ -3678,7 +3678,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, nss = *elems->opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; nss += 1; - sta->sta.rx_nss = nss; + sta->sta.deflink.rx_nss = nss; } rate_control_rate_init(sta); @@ -4836,9 +4836,9 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) if (!sta) return; - timeout = sta->status_stats.last_ack; - if (time_before(sta->status_stats.last_ack, sta->rx_stats.last_rx)) - timeout = sta->rx_stats.last_rx; + timeout = sta->deflink.status_stats.last_ack; + if (time_before(sta->deflink.status_stats.last_ack, sta->deflink.rx_stats.last_rx)) + timeout = sta->deflink.rx_stats.last_rx; timeout += IEEE80211_CONNECTION_IDLE_TIME; /* If timeout is after now, then update timer to fire at @@ -5638,7 +5638,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (rates) - new_sta->sta.supp_rates[cbss->channel->band] = rates; + new_sta->sta.deflink.supp_rates[cbss->channel->band] = rates; else sdata_info(sdata, "No rates found, keeping mandatory only\n"); diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 7c1a735b9eee..f97cb4c453d3 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -74,7 +74,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; - sta->sta.supp_rates[band] = + sta->sta.deflink.supp_rates[band] = ieee80211_mandatory_rates(sband, scan_width); spin_lock(&ifocb->incomplete_lock); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 8c6416129d5b..ae9700e0a1a5 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -371,7 +371,7 @@ static void __rate_control_send_low(struct ieee80211_hw *hw, WARN_ONCE(i == sband->n_bitrates, "no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n", sta ? sta->addr : NULL, - sta ? sta->supp_rates[sband->band] : -1, + sta ? sta->deflink.supp_rates[sband->band] : -1, sband->band, rate_mask, rate_flags); @@ -781,11 +781,11 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, u16 sta_vht_mask[NL80211_VHT_NSS_MAX]; /* Filter out rates that the STA does not support */ - *mask &= sta->supp_rates[sband->band]; + *mask &= sta->deflink.supp_rates[sband->band]; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i]; + mcs_mask[i] &= sta->deflink.ht_cap.mcs.rx_mask[i]; - sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map; + sta_vht_cap = sta->deflink.vht_cap.vht_mcs.rx_mcs_map; ieee80211_get_vht_mask_from_cap(sta_vht_cap, sta_vht_mask); for (i = 0; i < NL80211_VHT_NSS_MAX; i++) vht_mask[i] &= sta_vht_mask[i]; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9c6ace858107..7b1f5c045e06 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -362,6 +362,9 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { + if (!(mi->supported[group] & BIT(idx))) + continue; + if (rate->idx != mp->cck_rates[idx]) continue; @@ -603,7 +606,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; - if (!mi->sta->ht_cap.ht_supported) + if (!mi->sta->deflink.ht_cap.ht_supported) return; group = MI_RATE_GROUP(mi->max_tp_rate[0]); @@ -993,7 +996,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate; u16 index; - bool ht_supported = mi->sta->ht_cap.ht_supported; + bool ht_supported = mi->sta->deflink.ht_cap.ht_supported; if (mi->ampdu_packets > 0) { if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) @@ -1416,7 +1419,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) * the limit here to avoid the complexity of having to de-aggregate * packets in the queue. */ - if (!mi->sta->vht_cap.vht_supported) + if (!mi->sta->deflink.vht_cap.vht_supported) return IEEE80211_MAX_MPDU_LEN_HT_BA; /* unlimited */ @@ -1533,7 +1536,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != NL80211_BAND_2GHZ) return; - if (sta->ht_cap.ht_supported && + if (sta->deflink.ht_cap.ht_supported && !ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; @@ -1556,7 +1559,7 @@ minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, const u8 *rates; int i; - if (sta->ht_cap.ht_supported) + if (sta->deflink.ht_cap.ht_supported) return; rates = mp->ofdm_rates[sband->band]; @@ -1576,9 +1579,9 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, { struct minstrel_priv *mp = priv; struct minstrel_ht_sta *mi = priv_sta; - struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; - u16 ht_cap = sta->ht_cap.cap; - struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap; + struct ieee80211_mcs_info *mcs = &sta->deflink.ht_cap.mcs; + u16 ht_cap = sta->deflink.ht_cap.cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; const struct ieee80211_rate *ctl_rate; bool ldpc, erp; int use_vht; @@ -1650,7 +1653,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH && - sta->bandwidth < IEEE80211_STA_RX_BW_40) + sta->deflink.bandwidth < IEEE80211_STA_RX_BW_40) continue; nss = minstrel_mcs_groups[i].streams; @@ -1677,7 +1680,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, continue; if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) { - if (sta->bandwidth < IEEE80211_STA_RX_BW_80 || + if (sta->deflink.bandwidth < IEEE80211_STA_RX_BW_80 || ((gflags & IEEE80211_TX_RC_SHORT_GI) && !(vht_cap->cap & IEEE80211_VHT_CAP_SHORT_GI_80))) { continue; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index beb6b92eb780..959a36fd658b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -221,7 +221,7 @@ static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, skb_queue_tail(&sdata->skb_queue, skb); ieee80211_queue_work(&sdata->local->hw, &sdata->work); if (sta) - sta->rx_stats.packets++; + sta->deflink.rx_stats.packets++; } static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, @@ -1465,7 +1465,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); - rx->sta->rx_stats.num_duplicates++; + rx->sta->deflink.rx_stats.num_duplicates++; return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; @@ -1761,46 +1761,47 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) NL80211_IFTYPE_ADHOC); if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) - sta->rx_stats.last_rate = + sta->deflink.rx_stats.last_rate = sta_stats_encode_rate(status); } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; } else if (!ieee80211_is_s1g_beacon(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control)) - sta->rx_stats.last_rate = sta_stats_encode_rate(status); + sta->deflink.rx_stats.last_rate = sta_stats_encode_rate(status); } - sta->rx_stats.fragments++; + sta->deflink.rx_stats.fragments++; - u64_stats_update_begin(&rx->sta->rx_stats.syncp); - sta->rx_stats.bytes += rx->skb->len; - u64_stats_update_end(&rx->sta->rx_stats.syncp); + u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp); + sta->deflink.rx_stats.bytes += rx->skb->len; + u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp); if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { - sta->rx_stats.last_signal = status->signal; - ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal); + sta->deflink.rx_stats.last_signal = status->signal; + ewma_signal_add(&sta->deflink.rx_stats_avg.signal, + -status->signal); } if (status->chains) { - sta->rx_stats.chains = status->chains; + sta->deflink.rx_stats.chains = status->chains; for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { int signal = status->chain_signal[i]; if (!(status->chains & BIT(i))) continue; - sta->rx_stats.chain_signal_last[i] = signal; - ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + sta->deflink.rx_stats.chain_signal_last[i] = signal; + ewma_signal_add(&sta->deflink.rx_stats_avg.chain_signal[i], -signal); } } @@ -1861,7 +1862,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Update counter and free packet here to avoid * counting this as a dropped packed. */ - sta->rx_stats.packets++; + sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -1893,11 +1894,11 @@ ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx) } if (rx->sta) - key = rcu_dereference(rx->sta->gtk[idx]); + key = rcu_dereference(rx->sta->deflink.gtk[idx]); if (!key) key = rcu_dereference(sdata->keys[idx]); if (!key && rx->sta) - key = rcu_dereference(rx->sta->gtk[idx2]); + key = rcu_dereference(rx->sta->deflink.gtk[idx2]); if (!key) key = rcu_dereference(sdata->keys[idx2]); @@ -2012,7 +2013,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) test_sta_flag(rx->sta, WLAN_STA_MFP)) return RX_DROP_MONITOR; - rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]); + rx->key = rcu_dereference(rx->sta->deflink.gtk[mmie_keyidx]); } if (!rx->key) rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); @@ -2035,7 +2036,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) } else { if (rx->sta) { for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - key = rcu_dereference(rx->sta->gtk[i]); + key = rcu_dereference(rx->sta->deflink.gtk[i]); if (key) break; } @@ -2072,7 +2073,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* check per-station GTK first, if multicast packet */ if (is_multicast_ether_addr(hdr->addr1) && rx->sta) - rx->key = rcu_dereference(rx->sta->gtk[keyidx]); + rx->key = rcu_dereference(rx->sta->deflink.gtk[keyidx]); /* if not found, try default key */ if (!rx->key) { @@ -2398,7 +2399,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: ieee80211_led_rx(rx->local); if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; return RX_CONTINUE; } @@ -2645,9 +2646,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) * for non-QoS-data frames. Here we know it's a data * frame, so count MSDUs. */ - u64_stats_update_begin(&rx->sta->rx_stats.syncp); - rx->sta->rx_stats.msdu[rx->seqno_idx]++; - u64_stats_update_end(&rx->sta->rx_stats.syncp); + u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp); + rx->sta->deflink.rx_stats.msdu[rx->seqno_idx]++; + u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp); } if ((sdata->vif.type == NL80211_IFTYPE_AP || @@ -3178,6 +3179,49 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } +static void +ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) +{ + struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; + const struct element *ie; + size_t baselen; + + if (!wiphy_ext_feature_isset(rx->local->hw.wiphy, + NL80211_EXT_FEATURE_BSS_COLOR)) + return; + + if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION)) + return; + + if (rx->sdata->vif.csa_active) + return; + + baselen = mgmt->u.beacon.variable - rx->skb->data; + if (baselen > rx->skb->len) + return; + + ie = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, + mgmt->u.beacon.variable, + rx->skb->len - baselen); + if (ie && ie->datalen >= sizeof(struct ieee80211_he_operation) && + ie->datalen >= ieee80211_he_oper_size(ie->data + 1)) { + struct ieee80211_bss_conf *bss_conf = &rx->sdata->vif.bss_conf; + const struct ieee80211_he_operation *he_oper; + u8 color; + + he_oper = (void *)(ie->data + 1); + if (le32_get_bits(he_oper->he_oper_params, + IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED)) + return; + + color = le32_get_bits(he_oper->he_oper_params, + IEEE80211_HE_OPERATION_BSS_COLOR_MASK); + if (color == bss_conf->he_bss_color.color) + ieeee80211_obss_color_collision_notify(&rx->sdata->vif, + BIT_ULL(color)); + } +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) { @@ -3203,6 +3247,9 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { int sig = 0; + /* sw bss color collision detection */ + ieee80211_rx_check_bss_color_collision(rx); + if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) && !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) sig = status->signal; @@ -3296,7 +3343,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: /* reject HT action frames from stations not supporting HT */ - if (!rx->sta->sta.ht_cap.ht_supported) + if (!rx->sta->sta.deflink.ht_cap.ht_supported) goto invalid; if (sdata->vif.type != NL80211_IFTYPE_STATION && @@ -3360,7 +3407,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) struct sta_opmode_info sta_opmode = {}; /* If it doesn't support 40 MHz it can't change ... */ - if (!(rx->sta->sta.ht_cap.cap & + if (!(rx->sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) goto handled; @@ -3370,13 +3417,13 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) max_bw = ieee80211_sta_cap_rx_bw(rx->sta); /* set cur_max_bandwidth and recalc sta bw */ - rx->sta->cur_max_bandwidth = max_bw; + rx->sta->deflink.cur_max_bandwidth = max_bw; new_bw = ieee80211_sta_cur_vht_bw(rx->sta); - if (rx->sta->sta.bandwidth == new_bw) + if (rx->sta->sta.deflink.bandwidth == new_bw) goto handled; - rx->sta->sta.bandwidth = new_bw; + rx->sta->sta.deflink.bandwidth = new_bw; sband = rx->local->hw.wiphy->bands[status->band]; sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(rx->sta); @@ -3573,7 +3620,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) handled: if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; @@ -3607,7 +3654,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) ieee80211_rx_status_to_khz(status), sig, rx->skb->data, rx->skb->len, 0)) { if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -3645,7 +3692,7 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx) handled: if (rx->sta) - rx->sta->rx_stats.packets++; + rx->sta->deflink.rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -3865,7 +3912,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_MONITOR: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->rx_stats.dropped++; + rx->sta->deflink.rx_stats.dropped++; fallthrough; case RX_CONTINUE: { struct ieee80211_rate *rate = NULL; @@ -3884,7 +3931,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_UNUSABLE: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->rx_stats.dropped++; + rx->sta->deflink.rx_stats.dropped++; dev_kfree_skb(rx->skb); break; case RX_QUEUED: @@ -4436,15 +4483,15 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, void *sa = skb->data + ETH_ALEN; void *da = skb->data; - stats = &sta->rx_stats; + stats = &sta->deflink.rx_stats; if (fast_rx->uses_rss) - stats = this_cpu_ptr(sta->pcpu_rx_stats); + stats = this_cpu_ptr(sta->deflink.pcpu_rx_stats); /* statistics part of ieee80211_rx_h_sta_process() */ if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { stats->last_signal = status->signal; if (!fast_rx->uses_rss) - ewma_signal_add(&sta->rx_stats_avg.signal, + ewma_signal_add(&sta->deflink.rx_stats_avg.signal, -status->signal); } @@ -4460,7 +4507,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, stats->chain_signal_last[i] = signal; if (!fast_rx->uses_rss) - ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + ewma_signal_add(&sta->deflink.rx_stats_avg.chain_signal[i], -signal); } } @@ -4536,7 +4583,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; } addrs __aligned(2); - struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats; /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write * to a common data structure; drivers can implement that per queue @@ -4638,7 +4685,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, drop: dev_kfree_skb(skb); if (fast_rx->uses_rss) - stats = this_cpu_ptr(sta->pcpu_rx_stats); + stats = this_cpu_ptr(sta->deflink.pcpu_rx_stats); stats->dropped++; return true; diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index 4141bc80cdfd..8ca7d45d6daa 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -11,8 +11,8 @@ void ieee80211_s1g_sta_rate_init(struct sta_info *sta) { /* avoid indicating legacy bitrates for S1G STAs */ - sta->tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS; - sta->rx_stats.last_rate = + sta->deflink.tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS; + sta->deflink.rx_stats.last_rate = STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 91fbb1ee5c38..e04a0905e941 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -287,7 +287,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif - free_percpu(sta->pcpu_rx_stats); + free_percpu(sta->deflink.pcpu_rx_stats); kfree(sta); } @@ -346,9 +346,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return NULL; if (ieee80211_hw_check(hw, USES_RSS)) { - sta->pcpu_rx_stats = + sta->deflink.pcpu_rx_stats = alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp); - if (!sta->pcpu_rx_stats) + if (!sta->deflink.pcpu_rx_stats) goto free; } @@ -376,6 +376,14 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sta.max_rx_aggregation_subframes = local->hw.max_rx_aggregation_subframes; + /* TODO link specific alloc and assignments for MLO Link STA */ + + /* For non MLO STA, link info can be accessed either via deflink + * or link[0] + */ + sta->link[0] = &sta->deflink; + sta->sta.link[0] = &sta->sta.deflink; + /* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only. * The Tx path starts to use a key as soon as the key slot ptk_idx * references to is not NULL. To not use the initial Rx-only key @@ -387,9 +395,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; - sta->rx_stats.last_rx = jiffies; + sta->deflink.rx_stats.last_rx = jiffies; - u64_stats_init(&sta->rx_stats.syncp); + u64_stats_init(&sta->deflink.rx_stats.syncp); ieee80211_init_frag_cache(&sta->frags); @@ -399,10 +407,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->reserved_tid = IEEE80211_TID_UNRESERVED; sta->last_connected = ktime_get_seconds(); - ewma_signal_init(&sta->rx_stats_avg.signal); - ewma_avg_signal_init(&sta->status_stats.avg_ack_signal); - for (i = 0; i < ARRAY_SIZE(sta->rx_stats_avg.chain_signal); i++) - ewma_signal_init(&sta->rx_stats_avg.chain_signal[i]); + ewma_signal_init(&sta->deflink.rx_stats_avg.signal); + ewma_avg_signal_init(&sta->deflink.status_stats.avg_ack_signal); + for (i = 0; i < ARRAY_SIZE(sta->deflink.rx_stats_avg.chain_signal); i++) + ewma_signal_init(&sta->deflink.rx_stats_avg.chain_signal[i]); if (local->ops->wake_tx_queue) { void *txq_data; @@ -472,7 +480,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (!(rate->flags & mandatory)) continue; - sta->sta.supp_rates[i] |= BIT(r); + sta->sta.deflink.supp_rates[i] |= BIT(r); } } @@ -524,7 +532,7 @@ free_txq: if (sta->sta.txq[0]) kfree(to_txq_info(sta->sta.txq[0])); free: - free_percpu(sta->pcpu_rx_stats); + free_percpu(sta->deflink.pcpu_rx_stats); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif @@ -2087,16 +2095,16 @@ int sta_info_move_state(struct sta_info *sta, u8 sta_info_tx_streams(struct sta_info *sta) { - struct ieee80211_sta_ht_cap *ht_cap = &sta->sta.ht_cap; + struct ieee80211_sta_ht_cap *ht_cap = &sta->sta.deflink.ht_cap; u8 rx_streams; - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return 1; - if (sta->sta.vht_cap.vht_supported) { + if (sta->sta.deflink.vht_cap.vht_supported) { int i; u16 tx_mcs_map = - le16_to_cpu(sta->sta.vht_cap.vht_mcs.tx_mcs_map); + le16_to_cpu(sta->sta.deflink.vht_cap.vht_mcs.tx_mcs_map); for (i = 7; i >= 0; i--) if ((tx_mcs_map & (0x3 << (i * 2))) != @@ -2123,16 +2131,16 @@ u8 sta_info_tx_streams(struct sta_info *sta) static struct ieee80211_sta_rx_stats * sta_get_last_rx_stats(struct sta_info *sta) { - struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats; int cpu; - if (!sta->pcpu_rx_stats) + if (!sta->deflink.pcpu_rx_stats) return stats; for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpustats; - cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); if (time_after(cpustats->last_rx, stats->last_rx)) stats = cpustats; @@ -2226,13 +2234,15 @@ static void sta_set_tidstats(struct sta_info *sta, int cpu; if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { - tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid); + tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats, + tid); - if (sta->pcpu_rx_stats) { + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, + cpu); tidstats->rx_msdu += sta_get_tidstats_msdu(cpurxs, tid); } @@ -2243,19 +2253,19 @@ static void sta_set_tidstats(struct sta_info *sta, if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); - tidstats->tx_msdu = sta->tx_stats.msdu[tid]; + tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); - tidstats->tx_msdu_retries = sta->status_stats.msdu_retries[tid]; + tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); - tidstats->tx_msdu_failed = sta->status_stats.msdu_failed[tid]; + tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid]; } if (local->ops->wake_tx_queue && tid < IEEE80211_NUM_TIDS) { @@ -2326,26 +2336,27 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { sinfo->tx_bytes = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) - sinfo->tx_bytes += sta->tx_stats.bytes[ac]; + sinfo->tx_bytes += sta->deflink.tx_stats.bytes[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) { sinfo->tx_packets = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) - sinfo->tx_packets += sta->tx_stats.packets[ac]; + sinfo->tx_packets += sta->deflink.tx_stats.packets[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); } if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { - sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); + sinfo->rx_bytes += sta_get_stats_bytes(&sta->deflink.rx_stats); - if (sta->pcpu_rx_stats) { + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, + cpu); sinfo->rx_bytes += sta_get_stats_bytes(cpurxs); } } @@ -2354,12 +2365,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) { - sinfo->rx_packets = sta->rx_stats.packets; - if (sta->pcpu_rx_stats) { + sinfo->rx_packets = sta->deflink.rx_stats.packets; + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, + cpu); sinfo->rx_packets += cpurxs->packets; } } @@ -2367,12 +2379,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) { - sinfo->tx_retries = sta->status_stats.retry_count; + sinfo->tx_retries = sta->deflink.status_stats.retry_count; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) { - sinfo->tx_failed = sta->status_stats.retry_failed; + sinfo->tx_failed = sta->deflink.status_stats.retry_failed; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } @@ -2393,12 +2405,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT); } - sinfo->rx_dropped_misc = sta->rx_stats.dropped; - if (sta->pcpu_rx_stats) { + sinfo->rx_dropped_misc = sta->deflink.rx_stats.dropped; + if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_dropped_misc += cpurxs->dropped; } } @@ -2417,10 +2429,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); } - if (!sta->pcpu_rx_stats && + if (!sta->deflink.pcpu_rx_stats && !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = - -ewma_signal_read(&sta->rx_stats_avg.signal); + -ewma_signal_read(&sta->deflink.rx_stats_avg.signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); } } @@ -2433,7 +2445,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); - if (!sta->pcpu_rx_stats) + if (!sta->deflink.pcpu_rx_stats) sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); sinfo->chains = last_rxstats->chains; @@ -2442,12 +2454,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->chain_signal[i] = last_rxstats->chain_signal_last[i]; sinfo->chain_signal_avg[i] = - -ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]); + -ewma_signal_read(&sta->deflink.rx_stats_avg.chain_signal[i]); } } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, + sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &sinfo->txrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); } @@ -2529,16 +2541,16 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) && - sta->status_stats.ack_signal_filled) { - sinfo->ack_signal = sta->status_stats.last_ack_signal; + sta->deflink.status_stats.ack_signal_filled) { + sinfo->ack_signal = sta->deflink.status_stats.last_ack_signal; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) && - sta->status_stats.ack_signal_filled) { + sta->deflink.status_stats.ack_signal_filled) { sinfo->avg_ack_signal = -(s8)ewma_avg_signal_read( - &sta->status_stats.avg_ack_signal); + &sta->deflink.status_stats.avg_ack_signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); } @@ -2573,10 +2585,10 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (!sta->status_stats.last_ack || - time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->deflink.status_stats.last_ack || + time_after(stats->last_rx, sta->deflink.status_stats.last_ack)) return stats->last_rx; - return sta->status_stats.last_ack; + return sta->deflink.status_stats.last_ack; } static void sta_update_codel_params(struct sta_info *sta, u32 thr) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 379fd367197f..35c390bedfba 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -484,6 +484,86 @@ struct ieee80211_fragment_cache { #define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ /** + * struct link_sta_info - Link STA information + * All link specific sta info are stored here for reference. This can be + * a single entry for non-MLD STA or multiple entries for MLD STA + * @addr: Link MAC address - Can be same as MLD STA mac address and is always + * same for non-MLD STA. This is used as key for searching link STA + * @link_id: Link ID uniquely identifying the link STA. This is 0 for non-MLD + * and set to the corresponding vif LinkId for MLD STA + * @sta: Points to the STA info + * @gtk: group keys negotiated with this station, if any + * @tx_stats: TX statistics + * @tx_stats.packets: # of packets transmitted + * @tx_stats.bytes: # of bytes in all packets transmitted + * @tx_stats.last_rate: last TX rate + * @tx_stats.msdu: # of transmitted MSDUs per TID + * @rx_stats: RX statistics + * @rx_stats_avg: averaged RX statistics + * @rx_stats_avg.signal: averaged signal + * @rx_stats_avg.chain_signal: averaged per-chain signal + * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs + * this (by advertising the USES_RSS hw flag) + * @status_stats: TX status statistics + * @status_stats.filtered: # of filtered frames + * @status_stats.retry_failed: # of frames that failed after retry + * @status_stats.retry_count: # of retries attempted + * @status_stats.lost_packets: # of lost packets + * @status_stats.last_pkt_time: timestamp of last ACKed packet + * @status_stats.msdu_retries: # of MSDU retries + * @status_stats.msdu_failed: # of failed MSDUs + * @status_stats.last_ack: last ack timestamp (jiffies) + * @status_stats.last_ack_signal: last ACK signal + * @status_stats.ack_signal_filled: last ACK signal validity + * @status_stats.avg_ack_signal: average ACK signal + * TODO Move other link params from sta_info as required for MLD operation + */ +struct link_sta_info { + u8 addr[ETH_ALEN]; + u8 link_id; + + /* TODO rhash head/node for finding link_sta based on addr */ + + struct sta_info *sta; + struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS]; + struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats; + + /* Updated from RX path only, no locking requirements */ + struct ieee80211_sta_rx_stats rx_stats; + struct { + struct ewma_signal signal; + struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; + } rx_stats_avg; + + /* Updated from TX status path only, no locking requirements */ + struct { + unsigned long filtered; + unsigned long retry_failed, retry_count; + unsigned int lost_packets; + unsigned long last_pkt_time; + u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; + u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; + unsigned long last_ack; + s8 last_ack_signal; + bool ack_signal_filled; + struct ewma_avg_signal avg_ack_signal; + } status_stats; + + /* Updated from TX path only, no locking requirements */ + struct { + u64 packets[IEEE80211_NUM_ACS]; + u64 bytes[IEEE80211_NUM_ACS]; + struct ieee80211_tx_rate last_rate; + struct rate_info last_rate_info; + u64 msdu[IEEE80211_NUM_TIDS + 1]; + } tx_stats; + + enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; +}; + +/** * struct sta_info - STA information * * This structure collects information about a station that @@ -498,7 +578,6 @@ struct ieee80211_fragment_cache { * @sdata: virtual interface this station belongs to * @ptk: peer keys negotiated with this station, if any * @ptk_idx: last installed peer key index - * @gtk: group keys negotiated with this station, if any * @rate_ctrl: rate control algorithm reference * @rate_ctrl_lock: spinlock used to protect rate control data * (data inside the algorithm, so serializes calls there) @@ -544,30 +623,19 @@ struct ieee80211_fragment_cache { * @fast_rx: RX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. - * @tx_stats: TX statistics - * @tx_stats.packets: # of packets transmitted - * @tx_stats.bytes: # of bytes in all packets transmitted - * @tx_stats.last_rate: last TX rate - * @tx_stats.msdu: # of transmitted MSDUs per TID - * @rx_stats: RX statistics - * @rx_stats_avg: averaged RX statistics - * @rx_stats_avg.signal: averaged signal - * @rx_stats_avg.chain_signal: averaged per-chain signal - * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs - * this (by advertising the USES_RSS hw flag) - * @status_stats: TX status statistics - * @status_stats.filtered: # of filtered frames - * @status_stats.retry_failed: # of frames that failed after retry - * @status_stats.retry_count: # of retries attempted - * @status_stats.lost_packets: # of lost packets - * @status_stats.last_pkt_time: timestamp of last ACKed packet - * @status_stats.msdu_retries: # of MSDU retries - * @status_stats.msdu_failed: # of failed MSDUs - * @status_stats.last_ack: last ack timestamp (jiffies) - * @status_stats.last_ack_signal: last ACK signal - * @status_stats.ack_signal_filled: last ACK signal validity - * @status_stats.avg_ack_signal: average ACK signal * @frags: fragment cache + * @multi_link_sta: Identifies if this sta is a MLD STA or regular STA + * @deflink: This is the default link STA information, for non MLO STA all link + * specific STA information is accessed through @deflink or through + * link[0] which points to address of @deflink. For MLO Link STA + * the first added link STA will point to deflink. + * @link: reference to Link Sta entries. For Non MLO STA, except 1st link, + * i.e link[0] all links would be assigned to NULL by default and + * would access link information via @deflink or link[0]. For MLO + * STA, first link STA being added will point its link pointer to + * @deflink address and remaining would be allocated and the address + * would be assigned to link[link_id] where link_id is the id assigned + * by the AP. */ struct sta_info { /* General information, mostly static */ @@ -577,9 +645,6 @@ struct sta_info { u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + - NUM_DEFAULT_MGMT_KEYS + - NUM_DEFAULT_BEACON_KEYS]; struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS]; u8 ptk_idx; struct rate_control_ref *rate_ctrl; @@ -589,7 +654,6 @@ struct sta_info { struct ieee80211_fast_tx __rcu *fast_tx; struct ieee80211_fast_rx __rcu *fast_rx; - struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats; #ifdef CONFIG_MAC80211_MESH struct mesh_sta *mesh; @@ -619,38 +683,9 @@ struct sta_info { u64 assoc_at; long last_connected; - /* Updated from RX path only, no locking requirements */ - struct ieee80211_sta_rx_stats rx_stats; - struct { - struct ewma_signal signal; - struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; - } rx_stats_avg; - /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; - /* Updated from TX status path only, no locking requirements */ - struct { - unsigned long filtered; - unsigned long retry_failed, retry_count; - unsigned int lost_packets; - unsigned long last_pkt_time; - u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; - u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; - unsigned long last_ack; - s8 last_ack_signal; - bool ack_signal_filled; - struct ewma_avg_signal avg_ack_signal; - } status_stats; - - /* Updated from TX path only, no locking requirements */ - struct { - u64 packets[IEEE80211_NUM_ACS]; - u64 bytes[IEEE80211_NUM_ACS]; - struct ieee80211_tx_rate last_rate; - struct rate_info last_rate_info; - u64 msdu[IEEE80211_NUM_TIDS + 1]; - } tx_stats; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; struct airtime_info airtime[IEEE80211_NUM_ACS]; @@ -664,8 +699,6 @@ struct sta_info { struct dentry *debugfs_dir; #endif - enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; - enum ieee80211_smps_mode known_smps_mode; const struct ieee80211_cipher_scheme *cipher_scheme; @@ -677,6 +710,10 @@ struct sta_info { struct ieee80211_fragment_cache frags; + bool multi_link_sta; + struct link_sta_info deflink; + struct link_sta_info *link[MAX_STA_LINKS]; + /* keep last! */ struct ieee80211_sta sta; }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index e81e8a5bb774..c563fa718d84 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -72,7 +72,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, info->flags |= IEEE80211_TX_INTFL_RETRANSMISSION; info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; - sta->status_stats.filtered++; + sta->deflink.status_stats.filtered++; /* * Clear more-data bit on filtered frames, it might be set @@ -776,7 +776,7 @@ static void ieee80211_lost_packet(struct sta_info *sta, !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; - sta->status_stats.lost_packets++; + sta->deflink.status_stats.lost_packets++; if (sta->sta.tdls) { pkt_time = STA_LOST_TDLS_PKT_TIME; pkt_thr = STA_LOST_PKT_THRESHOLD; @@ -789,13 +789,14 @@ static void ieee80211_lost_packet(struct sta_info *sta, * mechanism. * For non-TDLS, use STA_LOST_PKT_THRESHOLD and STA_LOST_PKT_TIME */ - if (sta->status_stats.lost_packets < pkt_thr || - !time_after(jiffies, sta->status_stats.last_pkt_time + pkt_time)) + if (sta->deflink.status_stats.lost_packets < pkt_thr || + !time_after(jiffies, sta->deflink.status_stats.last_pkt_time + pkt_time)) return; cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr, - sta->status_stats.lost_packets, GFP_ATOMIC); - sta->status_stats.lost_packets = 0; + sta->deflink.status_stats.lost_packets, + GFP_ATOMIC); + sta->deflink.status_stats.lost_packets = 0; } static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, @@ -930,7 +931,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) - sta->tx_stats.last_rate = + sta->deflink.tx_stats.last_rate = info->status.rates[rates_idx]; if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && @@ -976,9 +977,9 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, return; } else if (ieee80211_is_data_present(fc)) { if (!acked && !noack_success) - sta->status_stats.msdu_failed[tid]++; + sta->deflink.status_stats.msdu_failed[tid]++; - sta->status_stats.msdu_retries[tid] += + sta->deflink.status_stats.msdu_retries[tid] += retry_count; } @@ -1111,7 +1112,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, sta = container_of(pubsta, struct sta_info, sta); if (status->rate) - sta->tx_stats.last_rate_info = *status->rate; + sta->deflink.tx_stats.last_rate_info = *status->rate; } if (skb && (tx_time_est = @@ -1142,8 +1143,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = sta->sdata; if (!acked && !noack_success) - sta->status_stats.retry_failed++; - sta->status_stats.retry_count += retry_count; + sta->deflink.status_stats.retry_failed++; + sta->deflink.status_stats.retry_count += retry_count; if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { if (sdata->vif.type == NL80211_IFTYPE_STATION && @@ -1152,13 +1153,13 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, acked, info->status.tx_time); if (acked) { - sta->status_stats.last_ack = jiffies; + sta->deflink.status_stats.last_ack = jiffies; - if (sta->status_stats.lost_packets) - sta->status_stats.lost_packets = 0; + if (sta->deflink.status_stats.lost_packets) + sta->deflink.status_stats.lost_packets = 0; /* Track when last packet was ACKed */ - sta->status_stats.last_pkt_time = jiffies; + sta->deflink.status_stats.last_pkt_time = jiffies; /* Reset connection monitor */ if (sdata->vif.type == NL80211_IFTYPE_STATION && @@ -1166,10 +1167,10 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, sdata->u.mgd.probe_send_count = 0; if (ack_signal_valid) { - sta->status_stats.last_ack_signal = + sta->deflink.status_stats.last_ack_signal = (s8)info->status.ack_signal; - sta->status_stats.ack_signal_filled = true; - ewma_avg_signal_add(&sta->status_stats.avg_ack_signal, + sta->deflink.status_stats.ack_signal_filled = true; + ewma_avg_signal_add(&sta->deflink.status_stats.avg_ack_signal, -info->status.ack_signal); } } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { @@ -1235,7 +1236,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, rate_control_tx_status(local, sband, &status); if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) - sta->tx_stats.last_rate = info->status.rates[0]; + sta->deflink.tx_stats.last_rate = info->status.rates[0]; } EXPORT_SYMBOL(ieee80211_tx_rate_update); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 137be9ec94af..4e2d22e47429 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -459,9 +459,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && - ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { + ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) { /* the peer caps are already intersected with our own */ - memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap)); + memcpy(&ht_cap, &sta->sta.deflink.ht_cap, sizeof(ht_cap)); pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); @@ -510,9 +510,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && - vht_cap.vht_supported && sta->sta.vht_cap.vht_supported) { + vht_cap.vht_supported && sta->sta.deflink.vht_cap.vht_supported) { /* the peer caps are already intersected with our own */ - memcpy(&vht_cap, &sta->sta.vht_cap, sizeof(vht_cap)); + memcpy(&vht_cap, &sta->sta.deflink.vht_cap, sizeof(vht_cap)); /* the AID is present only when VHT is implemented */ ieee80211_tdls_add_aid(sdata, skb); @@ -603,13 +603,13 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, * if HT support is only added in TDLS, we need an HT-operation IE. * add the IE as required by IEEE802.11-2012 9.23.3.2. */ - if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { + if (!ap_sta->sta.deflink.ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) { u16 prot = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); - ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap, + ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap, &sdata->vif.bss_conf.chandef, prot, true); } @@ -618,7 +618,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, /* only include VHT-operation if not on the 2.4GHz band */ if (sband->band != NL80211_BAND_2GHZ && - sta->sta.vht_cap.vht_supported) { + sta->sta.deflink.vht_cap.vht_supported) { /* * if both peers support WIDER_BW, we can expand the chandef to * a wider compatible one, up to 80MHz @@ -627,7 +627,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, ieee80211_tdls_chandef_vht_upgrade(sdata, sta); pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation)); - ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap, + ieee80211_ie_build_vht_oper(pos, &sta->sta.deflink.vht_cap, &sta->tdls_chandef); } @@ -1269,8 +1269,8 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, bw = ieee80211_chan_width_to_rx_bw(conf->def.width); bw = min(bw, ieee80211_sta_cap_rx_bw(sta)); - if (bw != sta->sta.bandwidth) { - sta->sta.bandwidth = bw; + if (bw != sta->sta.deflink.bandwidth) { + sta->sta.deflink.bandwidth = bw; rate_control_rate_update(local, sband, sta, IEEE80211_RC_BW_CHANGED); /* @@ -1296,7 +1296,7 @@ static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata) if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH) || - !sta->sta.ht_cap.ht_supported) + !sta->sta.deflink.ht_cap.ht_supported) continue; result = true; break; @@ -1321,7 +1321,7 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) return; - tdls_ht = (sta && sta->sta.ht_cap.ht_supported) || + tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) || iee80211_tdls_have_ht_peers(sdata); opmode = sdata->vif.bss_conf.ht_operation_mode; @@ -1900,7 +1900,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, } /* peer should have known better */ - if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs && + if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs && elems->sec_chan_offs->sec_chan_offs) { tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n"); ret = -ENOTSUPP; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d91498f77796..743adfbb9b15 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -860,8 +860,8 @@ TRACE_EVENT(drv_sta_set_txpwr, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->txpwr = sta->txpwr.power; - __entry->type = sta->txpwr.type; + __entry->txpwr = sta->deflink.txpwr.power; + __entry->type = sta->deflink.txpwr.type; ), TP_printk( diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b6b20f38de0e..13253eb39d09 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -768,9 +768,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (txrc.reported_rate.idx < 0) { txrc.reported_rate = tx->rate; if (tx->sta && ieee80211_is_tx_data(tx->skb)) - tx->sta->tx_stats.last_rate = txrc.reported_rate; + tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate; } else if (tx->sta) - tx->sta->tx_stats.last_rate = txrc.reported_rate; + tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate; if (ratetbl) return TX_CONTINUE; @@ -837,7 +837,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number); tx->sdata->sequence_number += 0x10; if (tx->sta) - tx->sta->tx_stats.msdu[IEEE80211_NUM_TIDS]++; + tx->sta->deflink.tx_stats.msdu[IEEE80211_NUM_TIDS]++; return TX_CONTINUE; } @@ -851,7 +851,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) /* include per-STA, per-TID sequence counter */ tid = ieee80211_get_tid(hdr); - tx->sta->tx_stats.msdu[tid]++; + tx->sta->deflink.tx_stats.msdu[tid]++; hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); @@ -1004,10 +1004,10 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) skb_queue_walk(&tx->skbs, skb) { ac = skb_get_queue_mapping(skb); - tx->sta->tx_stats.bytes[ac] += skb->len; + tx->sta->deflink.tx_stats.bytes[ac] += skb->len; } if (ac >= 0) - tx->sta->tx_stats.packets[ac]++; + tx->sta->deflink.tx_stats.packets[ac]++; return TX_CONTINUE; } @@ -1159,7 +1159,7 @@ ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) return; - if (!sta || !sta->sta.ht_cap.ht_supported || + if (!sta || !sta->sta.deflink.ht_cap.ht_supported || !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || skb->protocol == sdata->control_port_protocol) return; @@ -3462,18 +3462,18 @@ ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, } if (skb_shinfo(skb)->gso_size) - sta->tx_stats.msdu[tid] += + sta->deflink.tx_stats.msdu[tid] += DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); else - sta->tx_stats.msdu[tid]++; + sta->deflink.tx_stats.msdu[tid]++; info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; /* statistics normally done by ieee80211_tx_h_stats (but that * has to consider fragmentation, so is more complex) */ - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++; if (pn_offs) { u64 pn; @@ -4481,8 +4481,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, dev_sw_netstats_tx_add(dev, 1, skb->len); - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 8f16aa9c725d..ff26e0c4787b 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -118,14 +118,14 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_vht_cap *vht_cap_ie, struct sta_info *sta) { - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; struct ieee80211_sta_vht_cap own_cap; u32 cap_info, i; bool have_80mhz; memset(vht_cap, 0, sizeof(*vht_cap)); - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return; if (!vht_cap_ie || !sband->vht_cap.vht_supported) @@ -295,10 +295,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; break; default: - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_80; if (!(vht_cap->vht_mcs.tx_highest & cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE))) @@ -310,10 +310,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, * above) between 160 and 80+80 yet. */ if (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; } - sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); + sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta); switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: @@ -332,9 +332,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* FIXME: move this to some better location - parses HE/EHT now */ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) { - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; - struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; - struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.eht_cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; + struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap; + struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.deflink.eht_cap; u32 cap_width; if (he_cap->has_he) { @@ -369,7 +369,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) } if (!vht_cap->vht_supported) - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + return sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; @@ -392,14 +392,14 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta) { - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; u32 cap_width; if (!vht_cap->vht_supported) { - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20_NOHT; - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + return sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20; } @@ -416,13 +416,13 @@ enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta) enum nl80211_chan_width ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta) { - enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.bandwidth; - struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.deflink.bandwidth; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap; u32 cap_width; switch (cur_bw) { case IEEE80211_STA_RX_BW_20: - if (!sta->sta.ht_cap.ht_supported) + if (!sta->sta.deflink.ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20_NOHT; else return NL80211_CHAN_WIDTH_20; @@ -473,7 +473,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) enum nl80211_chan_width bss_width = sdata->vif.bss_conf.chandef.width; bw = ieee80211_sta_cap_rx_bw(sta); - bw = min(bw, sta->cur_max_bandwidth); + bw = min(bw, sta->deflink.cur_max_bandwidth); /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of * IEEE80211-2016 specification makes higher bandwidth operation @@ -501,12 +501,12 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) bool support_160; /* if we received a notification already don't overwrite it */ - if (sta->sta.rx_nss) + if (sta->sta.deflink.rx_nss) return; - if (sta->sta.eht_cap.has_eht) { + if (sta->sta.deflink.eht_cap.has_eht) { int i; - const u8 *rx_nss_mcs = (void *)&sta->sta.eht_cap.eht_mcs_nss_supp; + const u8 *rx_nss_mcs = (void *)&sta->sta.deflink.eht_cap.eht_mcs_nss_supp; /* get the max nss for EHT over all possible bandwidths and mcs */ for (i = 0; i < sizeof(struct ieee80211_eht_mcs_nss_supp); i++) @@ -515,10 +515,10 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) IEEE80211_EHT_MCS_NSS_RX)); } - if (sta->sta.he_cap.has_he) { + if (sta->sta.deflink.he_cap.has_he) { int i; u8 rx_mcs_80 = 0, rx_mcs_160 = 0; - const struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; + const struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap; u16 mcs_160_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); u16 mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); @@ -549,23 +549,23 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) he_rx_nss = rx_mcs_80; } - if (sta->sta.ht_cap.ht_supported) { - if (sta->sta.ht_cap.mcs.rx_mask[0]) + if (sta->sta.deflink.ht_cap.ht_supported) { + if (sta->sta.deflink.ht_cap.mcs.rx_mask[0]) ht_rx_nss++; - if (sta->sta.ht_cap.mcs.rx_mask[1]) + if (sta->sta.deflink.ht_cap.mcs.rx_mask[1]) ht_rx_nss++; - if (sta->sta.ht_cap.mcs.rx_mask[2]) + if (sta->sta.deflink.ht_cap.mcs.rx_mask[2]) ht_rx_nss++; - if (sta->sta.ht_cap.mcs.rx_mask[3]) + if (sta->sta.deflink.ht_cap.mcs.rx_mask[3]) ht_rx_nss++; /* FIXME: consider rx_highest? */ } - if (sta->sta.vht_cap.vht_supported) { + if (sta->sta.deflink.vht_cap.vht_supported) { int i; u16 rx_mcs_map; - rx_mcs_map = le16_to_cpu(sta->sta.vht_cap.vht_mcs.rx_mcs_map); + rx_mcs_map = le16_to_cpu(sta->sta.deflink.vht_cap.vht_mcs.rx_mcs_map); for (i = 7; i >= 0; i--) { u8 mcs = (rx_mcs_map >> (2 * i)) & 3; @@ -581,7 +581,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) rx_nss = max(vht_rx_nss, ht_rx_nss); rx_nss = max(he_rx_nss, rx_nss); rx_nss = max(eht_rx_nss, rx_nss); - sta->sta.rx_nss = max_t(u8, 1, rx_nss); + sta->sta.deflink.rx_nss = max_t(u8, 1, rx_nss); } u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -601,8 +601,8 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; nss += 1; - if (sta->sta.rx_nss != nss) { - sta->sta.rx_nss = nss; + if (sta->sta.deflink.rx_nss != nss) { + sta->sta.deflink.rx_nss = nss; sta_opmode.rx_nss = nss; changed |= IEEE80211_RC_NSS_CHANGED; sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED; @@ -611,27 +611,27 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_20; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_40; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80) - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; else - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_80; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: /* legacy only, no longer used by newer spec */ - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160; break; } new_bw = ieee80211_sta_cur_vht_bw(sta); - if (new_bw != sta->sta.bandwidth) { - sta->sta.bandwidth = new_bw; + if (new_bw != sta->sta.deflink.bandwidth) { + sta->sta.deflink.bandwidth = new_bw; sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(sta); changed |= IEEE80211_RC_BW_CHANGED; sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED; diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index fbeebe3bc31d..1e4a9f74ed43 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -118,6 +118,7 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel) if (!ret) { wpan_phy->current_page = page; wpan_phy->current_channel = channel; + ieee802154_configure_durations(wpan_phy); } return ret; diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 702560acc8ce..1381e6a5e180 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -56,6 +56,8 @@ struct ieee802154_local { struct sk_buff *tx_skb; struct work_struct tx_work; + /* A negative Linux error code or a null/positive MLME error status */ + int tx_result; }; enum { diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 520cedc594e1..bd7bdb1219dd 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -113,6 +113,50 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) } EXPORT_SYMBOL(ieee802154_alloc_hw); +void ieee802154_configure_durations(struct wpan_phy *phy) +{ + u32 duration = 0; + + switch (phy->current_page) { + case 0: + if (BIT(phy->current_channel) & 0x1) + /* 868 MHz BPSK 802.15.4-2003: 20 ksym/s */ + duration = 50 * NSEC_PER_USEC; + else if (BIT(phy->current_channel) & 0x7FE) + /* 915 MHz BPSK 802.15.4-2003: 40 ksym/s */ + duration = 25 * NSEC_PER_USEC; + else if (BIT(phy->current_channel) & 0x7FFF800) + /* 2400 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */ + duration = 16 * NSEC_PER_USEC; + break; + case 2: + if (BIT(phy->current_channel) & 0x1) + /* 868 MHz O-QPSK 802.15.4-2006: 25 ksym/s */ + duration = 40 * NSEC_PER_USEC; + else if (BIT(phy->current_channel) & 0x7FE) + /* 915 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */ + duration = 16 * NSEC_PER_USEC; + break; + case 3: + if (BIT(phy->current_channel) & 0x3FFF) + /* 2.4 GHz CSS 802.15.4a-2007: 1/6 Msym/s */ + duration = 6 * NSEC_PER_USEC; + break; + default: + break; + } + + if (!duration) { + pr_debug("Unknown PHY symbol duration\n"); + return; + } + + phy->symbol_duration = duration; + phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; + phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; +} +EXPORT_SYMBOL(ieee802154_configure_durations); + void ieee802154_free_hw(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); @@ -131,10 +175,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy) * Should be done when all drivers sets this value. */ - wpan_phy->lifs_period = IEEE802154_LIFS_PERIOD * - wpan_phy->symbol_duration; - wpan_phy->sifs_period = IEEE802154_SIFS_PERIOD * - wpan_phy->symbol_duration; + wpan_phy->lifs_period = + (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000; + wpan_phy->sifs_period = + (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000; } int ieee802154_register_hw(struct ieee802154_hw *hw) @@ -157,6 +201,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) ieee802154_setup_wpan_phy_pib(local->phy); + ieee802154_configure_durations(local->phy); + if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) { local->phy->supported.min_csma_backoffs = 4; local->phy->supported.max_csma_backoffs = 4; diff --git a/net/mac802154/util.c b/net/mac802154/util.c index f2078238718b..9f024d85563b 100644 --- a/net/mac802154/util.c +++ b/net/mac802154/util.c @@ -58,8 +58,11 @@ enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer) void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling) { + struct ieee802154_local *local = hw_to_local(hw); + + local->tx_result = IEEE802154_SUCCESS; + if (ifs_handling) { - struct ieee802154_local *local = hw_to_local(hw); u8 max_sifs_size; /* If transceiver sets CRC on his own we need to use lifs @@ -88,6 +91,23 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, } EXPORT_SYMBOL(ieee802154_xmit_complete); +void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, + int reason) +{ + struct ieee802154_local *local = hw_to_local(hw); + + local->tx_result = reason; + ieee802154_wake_queue(hw); + dev_kfree_skb_any(skb); +} +EXPORT_SYMBOL(ieee802154_xmit_error); + +void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb) +{ + ieee802154_xmit_error(hw, skb, IEEE802154_SYSTEM_ERROR); +} +EXPORT_SYMBOL(ieee802154_xmit_hw_error); + void ieee802154_stop_device(struct ieee802154_local *local) { flush_workqueue(local->workqueue); diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index e22b0cbb2f35..c2fc2a7b2528 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -216,7 +216,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) return rc; @@ -238,7 +238,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (rc < 0) goto out_free; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (addr) { struct mctp_skb_cb *cb = mctp_cb(skb); diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 61205cf40074..24df29e135ed 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -352,7 +352,7 @@ static void mctp_test_route_input_sk(struct kunit *test) if (params->deliver) { KUNIT_EXPECT_EQ(test, rc, 0); - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); KUNIT_EXPECT_EQ(test, skb->len, 1); @@ -360,7 +360,7 @@ static void mctp_test_route_input_sk(struct kunit *test) } else { KUNIT_EXPECT_NE(test, rc, 0); - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_PTR_EQ(test, skb2, NULL); } @@ -423,7 +423,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) rc = mctp_route_input(&rt->rt, skb); } - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); if (params->rx_len) { KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); @@ -582,7 +582,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) rc = mctp_route_input(&rt->rt, skb); /* (potentially) receive message */ - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); if (params->deliver) KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d6fdc5782d33..35b5f806fdda 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1527,10 +1527,9 @@ static int mpls_ifdown(struct net_device *dev, int event) rt->rt_nh_size; struct mpls_route *orig = rt; - rt = kmalloc(size, GFP_KERNEL); + rt = kmemdup(orig, size, GFP_KERNEL); if (!rt) return -ENOMEM; - memcpy(rt, orig, size); } } diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index e54daceac58b..cb7f53f6ab22 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o + mib.o pm_netlink.o sockopt.o pm_userspace.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 8b235468c88f..ae20b7d92e28 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -16,6 +16,11 @@ #define MPTCP_SYSCTL_PATH "net/mptcp" static int mptcp_pernet_id; + +#ifdef CONFIG_SYSCTL +static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX; +#endif + struct mptcp_pernet { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_hdr; @@ -26,6 +31,7 @@ struct mptcp_pernet { u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; + u8 pm_type; }; static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) @@ -58,6 +64,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net) return mptcp_get_pernet(net)->stale_loss_cnt; } +int mptcp_get_pm_type(const struct net *net) +{ + return mptcp_get_pernet(net)->pm_type; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; @@ -65,6 +76,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; + pernet->pm_type = MPTCP_PM_TYPE_KERNEL; } #ifdef CONFIG_SYSCTL @@ -108,6 +120,14 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_douintvec_minmax, }, + { + .procname = "pm_type", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &mptcp_pm_type_max + }, {} }; @@ -128,6 +148,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[2].data = &pernet->checksum_enabled; table[3].data = &pernet->allow_join_initial_addr_port; table[4].data = &pernet->stale_loss_cnt; + table[5].data = &pernet->pm_type; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index e55d3dfbee0c..d93a8c9996fd 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), + SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 00576179a619..529d07af9e14 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -17,6 +17,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ + MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */ MPTCP_MIB_DATACSUMERR, /* The data checksum fail */ diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index f44125dd6697..dbb6d876a203 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -66,20 +66,103 @@ out_nosk: return err; } +struct mptcp_diag_ctx { + long s_slot; + long s_num; + unsigned int l_slot; + unsigned int l_num; +}; + +static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + bool net_admin) +{ + struct inet_diag_dump_data *cb_data = cb->data; + struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; + struct nlattr *bc = cb_data->inet_diag_nla_bc; + struct net *net = sock_net(skb->sk); + int i; + + for (i = diag_ctx->l_slot; i < INET_LHTABLE_SIZE; i++) { + struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; + struct sock *sk; + int num = 0; + + ilb = &tcp_hashinfo.listening_hash[i]; + + rcu_read_lock(); + spin_lock(&ilb->lock); + sk_nulls_for_each(sk, node, &ilb->nulls_head) { + const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk); + struct inet_sock *inet = inet_sk(sk); + int ret; + + if (num < diag_ctx->l_num) + goto next_listen; + + if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp")) + goto next_listen; + + sk = ctx->conn; + if (!sk || !net_eq(sock_net(sk), net)) + goto next_listen; + + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_listen; + + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next_listen; + + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + goto next_listen; + + ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin); + + sock_put(sk); + + if (ret < 0) { + spin_unlock(&ilb->lock); + rcu_read_unlock(); + diag_ctx->l_slot = i; + diag_ctx->l_num = num; + return; + } + diag_ctx->l_num = num + 1; + num = 0; +next_listen: + ++num; + } + spin_unlock(&ilb->lock); + rcu_read_unlock(); + + cond_resched(); + diag_ctx->l_num = 0; + } + + diag_ctx->l_num = 0; + diag_ctx->l_slot = i; +} + static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; struct mptcp_sock *msk; struct nlattr *bc; + BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx)); + cb_data = cb->data; bc = cb_data->inet_diag_nla_bc; - while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) != - NULL) { + while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot, + &diag_ctx->s_num)) != NULL) { struct inet_sock *inet = (struct inet_sock *)msk; struct sock *sk = (struct sock *)msk; int ret = 0; @@ -101,11 +184,14 @@ next: sock_put(sk); if (ret < 0) { /* will retry on the same position */ - cb->args[1]--; + diag_ctx->s_num--; break; } cond_resched(); } + + if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0) + mptcp_diag_dump_listeners(skb, cb, r, net_admin); } static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -116,6 +202,19 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = sk_rmem_alloc_get(sk); r->idiag_wqueue = sk_wmem_alloc_get(sk); + + if (inet_sk_state_load(sk) == TCP_LISTEN) { + struct sock *lsk = READ_ONCE(msk->first); + + if (lsk) { + /* override with settings from tcp listener, + * so Send-Q will show accept queue. + */ + r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog); + r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog); + } + } + if (!info) return; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 325383646f5c..e05d9458a025 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -825,7 +825,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, opts->suboptions = 0; - if (unlikely(__mptcp_check_fallback(msk))) + if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb))) return false; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { @@ -931,7 +931,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && - READ_ONCE(msk->pm.server_side)) + !subflow->request_join) tcp_send_ack(ssk); goto fully_established; } @@ -1133,7 +1133,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { if (!mp_opt.echo) { - mptcp_pm_add_addr_received(msk, &mp_opt.addr); + mptcp_pm_add_addr_received(sk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); @@ -1340,8 +1340,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; if (opts->csum_reqd) { + /* data_len == 0 is reserved for the infinite mapping, + * the checksum will also be set to 0. + */ put_unaligned_be32(mpext->data_len << 16 | - mptcp_make_csum(mpext), ptr); + (mpext->data_len ? mptcp_make_csum(mpext) : 0), + ptr); } else { put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 01809eef29b4..cdc2d79071f8 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) unsigned int subflows_max; int ret = 0; + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_active(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, @@ -179,7 +182,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, bool update_subflows; update_subflows = (ssk->sk_state == TCP_CLOSE) && - (subflow->request_join || subflow->mp_join); + (subflow->request_join || subflow->mp_join) && + mptcp_pm_is_kernel(msk); if (!READ_ONCE(pm->work_pending) && !update_subflows) return; @@ -196,19 +200,28 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, spin_unlock_bh(&pm->lock); } -void mptcp_pm_add_addr_received(struct mptcp_sock *msk, +void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_pm_data *pm = &msk->pm; pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, READ_ONCE(pm->accept_addr)); - mptcp_event_addr_announced(msk, addr); + mptcp_event_addr_announced(ssk, addr); spin_lock_bh(&pm->lock); - if (!READ_ONCE(pm->accept_addr)) { + if (mptcp_pm_is_userspace(msk)) { + if (mptcp_userspace_pm_active(msk)) { + mptcp_pm_announce_addr(msk, addr, true); + mptcp_pm_add_addr_send_ack(msk); + } else { + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); + } + } else if (!READ_ONCE(pm->accept_addr)) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { @@ -262,19 +275,52 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, spin_unlock_bh(&pm->lock); } -void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) +void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = subflow->conn; + struct mptcp_sock *msk; pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); - subflow->backup = bkup; + msk = mptcp_sk(sk); + if (subflow->backup != bkup) { + subflow->backup = bkup; + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + msk->last_snd = NULL; + else + __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags); + mptcp_data_unlock(sk); + } - mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); + mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC); } void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct sock *s = (struct sock *)msk; + pr_debug("fail_seq=%llu", fail_seq); + + if (mptcp_has_another_subflow(sk) || !READ_ONCE(msk->allow_infinite_fallback)) + return; + + if (!READ_ONCE(subflow->mp_fail_response_expect)) { + pr_debug("send MP_FAIL response and infinite map"); + + subflow->send_mp_fail = 1; + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); + subflow->send_infinite_map = 1; + } else if (s && inet_sk_state_load(s) != TCP_CLOSE) { + pr_debug("MP_FAIL response received"); + + mptcp_data_lock(s); + if (inet_sk_state_load(s) != TCP_CLOSE) + sk_stop_timer(s, &s->sk_timer); + mptcp_data_unlock(s); + } } /* path manager helpers */ @@ -382,27 +428,48 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) void mptcp_pm_data_reset(struct mptcp_sock *msk) { - msk->pm.add_addr_signaled = 0; - msk->pm.add_addr_accepted = 0; - msk->pm.local_addr_used = 0; - msk->pm.subflows = 0; - msk->pm.rm_list_tx.nr = 0; - msk->pm.rm_list_rx.nr = 0; - WRITE_ONCE(msk->pm.work_pending, false); - WRITE_ONCE(msk->pm.addr_signal, 0); - WRITE_ONCE(msk->pm.accept_addr, false); - WRITE_ONCE(msk->pm.accept_subflow, false); - WRITE_ONCE(msk->pm.remote_deny_join_id0, false); - msk->pm.status = 0; - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); + struct mptcp_pm_data *pm = &msk->pm; - mptcp_pm_nl_data_init(msk); + pm->add_addr_signaled = 0; + pm->add_addr_accepted = 0; + pm->local_addr_used = 0; + pm->subflows = 0; + pm->rm_list_tx.nr = 0; + pm->rm_list_rx.nr = 0; + WRITE_ONCE(pm->pm_type, pm_type); + + if (pm_type == MPTCP_PM_TYPE_KERNEL) { + bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk); + + /* pm->work_pending must be only be set to 'true' when + * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL + */ + WRITE_ONCE(pm->work_pending, + (!!mptcp_pm_get_local_addr_max(msk) && + subflows_allowed) || + !!mptcp_pm_get_add_addr_signal_max(msk)); + WRITE_ONCE(pm->accept_addr, + !!mptcp_pm_get_add_addr_accept_max(msk) && + subflows_allowed); + WRITE_ONCE(pm->accept_subflow, subflows_allowed); + } else { + WRITE_ONCE(pm->work_pending, 0); + WRITE_ONCE(pm->accept_addr, 0); + WRITE_ONCE(pm->accept_subflow, 0); + } + + WRITE_ONCE(pm->addr_signal, 0); + WRITE_ONCE(pm->remote_deny_join_id0, false); + pm->status = 0; + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); } void mptcp_pm_data_init(struct mptcp_sock *msk) { spin_lock_init(&msk->pm.lock); INIT_LIST_HEAD(&msk->pm.anno_list); + INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list); mptcp_pm_data_reset(msk); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index b5e8de6f7507..e099f2a12504 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -22,14 +22,6 @@ static struct genl_family mptcp_genl_family; static int pm_nl_pernet_id; -struct mptcp_pm_addr_entry { - struct list_head list; - struct mptcp_addr_info addr; - u8 flags; - int ifindex; - struct socket *lsk; -}; - struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; @@ -55,8 +47,19 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 -static bool addresses_equal(const struct mptcp_addr_info *a, - const struct mptcp_addr_info *b, bool use_port) +static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) +{ + return net_generic(net, pm_nl_pernet_id); +} + +static struct pm_nl_pernet * +pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) +{ + return pm_nl_get_pernet(sock_net((struct sock *)msk)); +} + +bool mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port) { bool addr_equals = false; @@ -120,7 +123,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); local_address(skc, &cur); - if (addresses_equal(&cur, saddr, saddr->port)) + if (mptcp_addresses_equal(&cur, saddr, saddr->port)) return true; } @@ -138,7 +141,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); remote_address(skc, &cur); - if (addresses_equal(&cur, daddr, daddr->port)) + if (mptcp_addresses_equal(&cur, daddr, daddr->port)) return true; } @@ -206,43 +209,39 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) { - const struct pm_nl_pernet *pernet; + const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((const struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->add_addr_signal_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet; + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->add_addr_accept_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet; + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->subflows_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet; + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->local_addr_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, @@ -262,7 +261,7 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (addresses_equal(&entry->addr, addr, true)) + if (mptcp_addresses_equal(&entry->addr, addr, true)) return entry; } @@ -279,7 +278,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (addresses_equal(&entry->addr, &saddr, true)) { + if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { ret = true; goto out; } @@ -353,8 +352,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, return entry; } -static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry) +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_pm_addr_entry *entry) { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; @@ -362,8 +361,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) - return false; + add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); + + if (add_entry) { + if (mptcp_pm_is_kernel(msk)) + return false; + + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + return true; + } add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); if (!add_entry) @@ -406,7 +413,7 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int i; for (i = 0; i < nr; i++) { - if (addresses_equal(&addrs[i], addr, addr->port)) + if (mptcp_addresses_equal(&addrs[i], addr, addr->port)) return true; } @@ -442,7 +449,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - if (deny_id0 && addresses_equal(&addrs[i], &remote, false)) + if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false)) continue; if (!lookup_address_in_vec(addrs, i, &addrs[i]) && @@ -475,7 +482,7 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) || + if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) || (lookup_by_id && entry->addr.id == info->id)) return entry; } @@ -490,7 +497,7 @@ lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_inf rcu_read_lock(); list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (addresses_equal(&entry->addr, addr, entry->addr.port)) { + if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) { ret = entry->addr.id; break; } @@ -508,7 +515,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) struct pm_nl_pernet *pernet; unsigned int subflows_max; - pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + pernet = pm_nl_get_pernet(sock_net(sk)); add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); local_addr_max = mptcp_pm_get_local_addr_max(msk); @@ -604,7 +611,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, unsigned int subflows_max; int i = 0; - pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + pernet = pm_nl_get_pernet_from_msk(msk); subflows_max = mptcp_pm_get_subflows_max(msk); rcu_read_lock(); @@ -724,9 +731,11 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info local; local_address((struct sock_common *)ssk, &local); - if (!addresses_equal(&local, addr, addr->port)) + if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; + if (subflow->backup != bkup) + msk->last_snd = NULL; subflow->backup = bkup; subflow->send_mp_prio = 1; subflow->request_bkup = bkup; @@ -796,6 +805,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (!removed) continue; + if (!mptcp_pm_is_kernel(msk)) + continue; + if (rm_type == MPTCP_MIB_RMADDR) { msk->pm.add_addr_accepted--; WRITE_ONCE(msk->pm.accept_addr, true); @@ -889,9 +901,9 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, * singled addresses */ list_for_each_entry(cur, &pernet->local_addr_list, list) { - if (addresses_equal(&cur->addr, &entry->addr, - address_use_port(entry) && - address_use_port(cur))) { + if (mptcp_addresses_equal(&cur->addr, &entry->addr, + address_use_port(entry) && + address_use_port(cur))) { /* allow replacing the exiting endpoint only if such * endpoint is an implicit one and the user-space * did not provide an endpoint id @@ -1018,14 +1030,17 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) */ local_address((struct sock_common *)msk, &msk_local); local_address((struct sock_common *)skc, &skc_local); - if (addresses_equal(&msk_local, &skc_local, false)) + if (mptcp_addresses_equal(&msk_local, &skc_local, false)) return 0; - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_local_id(msk, &skc_local); + + pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { + if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { ret = entry->addr.id; break; } @@ -1052,18 +1067,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return ret; } -void mptcp_pm_nl_data_init(struct mptcp_sock *msk) -{ - struct mptcp_pm_data *pm = &msk->pm; - bool subflows; - - subflows = !!mptcp_pm_get_subflows_max(msk); - WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) || - !!mptcp_pm_get_add_addr_signal_max(msk)); - WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows); - WRITE_ONCE(pm->accept_subflow, subflows); -} - #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1091,6 +1094,10 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { NLA_POLICY_NESTED(mptcp_pm_addr_policy), [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = + NLA_POLICY_NESTED(mptcp_pm_addr_policy), }; void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) @@ -1139,11 +1146,12 @@ static int mptcp_pm_family_to_addr(int family) return MPTCP_PM_ADDR_ATTR_ADDR4; } -static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, - bool require_family, - struct mptcp_pm_addr_entry *entry) +static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], + const struct nlattr *attr, + struct genl_info *info, + struct mptcp_addr_info *addr, + bool require_family) { - struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; int err, addr_addr; if (!attr) { @@ -1157,27 +1165,29 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, if (err) return err; - memset(entry, 0, sizeof(*entry)); + if (tb[MPTCP_PM_ADDR_ATTR_ID]) + addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); + if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) { if (!require_family) - goto skip_family; + return err; NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing family"); return -EINVAL; } - entry->addr.family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); - if (entry->addr.family != AF_INET + addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); + if (addr->family != AF_INET #if IS_ENABLED(CONFIG_MPTCP_IPV6) - && entry->addr.family != AF_INET6 + && addr->family != AF_INET6 #endif ) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "unknown address family"); return -EINVAL; } - addr_addr = mptcp_pm_family_to_addr(entry->addr.family); + addr_addr = mptcp_pm_family_to_addr(addr->family); if (!tb[addr_addr]) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing address data"); @@ -1185,22 +1195,47 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (entry->addr.family == AF_INET6) - entry->addr.addr6 = nla_get_in6_addr(tb[addr_addr]); + if (addr->family == AF_INET6) + addr->addr6 = nla_get_in6_addr(tb[addr_addr]); else #endif - entry->addr.addr.s_addr = nla_get_in_addr(tb[addr_addr]); + addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]); + + if (tb[MPTCP_PM_ADDR_ATTR_PORT]) + addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); + + return err; +} + +int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, + struct mptcp_addr_info *addr) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + memset(addr, 0, sizeof(*addr)); + + return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true); +} + +int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, + bool require_family, + struct mptcp_pm_addr_entry *entry) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + int err; + + memset(entry, 0, sizeof(*entry)); + + err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family); + if (err) + return err; -skip_family: if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); entry->ifindex = val; } - if (tb[MPTCP_PM_ADDR_ATTR_ID]) - entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); - if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); @@ -1212,7 +1247,7 @@ skip_family: static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) { - return net_generic(genl_info_net(info), pm_nl_pernet_id); + return pm_nl_get_pernet(genl_info_net(info)); } static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) @@ -1223,7 +1258,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; - if (!READ_ONCE(msk->fully_established)) + if (!READ_ONCE(msk->fully_established) || + mptcp_pm_is_userspace(msk)) goto next; lock_sock(sk); @@ -1247,7 +1283,7 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) struct mptcp_pm_addr_entry addr, *entry; int ret; - ret = mptcp_pm_parse_addr(attr, info, true, &addr); + ret = mptcp_pm_parse_entry(attr, info, true, &addr); if (ret < 0) return ret; @@ -1296,17 +1332,25 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) return 0; } -int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, +int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { struct mptcp_pm_addr_entry *entry; + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); *flags = 0; *ifindex = 0; if (id) { + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, + id, + flags, + ifindex); + rcu_read_lock(); - entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id); + entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); if (entry) { *flags = entry->flags; *ifindex = entry->ifindex; @@ -1366,6 +1410,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, struct sock *sk = (struct sock *)msk; bool remove_subflow; + if (mptcp_pm_is_userspace(msk)) + goto next; + if (list_empty(&msk->conn_list)) { mptcp_pm_remove_anno_addr(msk, addr, false); goto next; @@ -1400,11 +1447,11 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, struct sock *sk = (struct sock *)msk; struct mptcp_addr_info msk_local; - if (list_empty(&msk->conn_list)) + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; local_address((struct sock_common *)msk, &msk_local); - if (!addresses_equal(&msk_local, addr, addr->port)) + if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) goto next; lock_sock(sk); @@ -1430,7 +1477,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) unsigned int addr_max; int ret; - ret = mptcp_pm_parse_addr(attr, info, false, &addr); + ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1470,8 +1517,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) return ret; } -static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; @@ -1507,9 +1554,11 @@ static void mptcp_nl_remove_addrs_list(struct net *net, while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; - lock_sock(sk); - mptcp_pm_remove_addrs_and_subflows(msk, rm_list); - release_sock(sk); + if (!mptcp_pm_is_userspace(msk)) { + lock_sock(sk); + mptcp_pm_remove_addrs_and_subflows(msk, rm_list); + release_sock(sk); + } sock_put(sk); cond_resched(); @@ -1602,7 +1651,7 @@ static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info) void *reply; int ret; - ret = mptcp_pm_parse_addr(attr, info, false, &addr); + ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1653,7 +1702,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, void *hdr; int i; - pernet = net_generic(net, pm_nl_pernet_id); + pernet = pm_nl_get_pernet(net); spin_lock_bh(&pernet->lock); for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { @@ -1782,7 +1831,7 @@ static int mptcp_nl_set_flags(struct net *net, while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; - if (list_empty(&msk->conn_list)) + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; lock_sock(sk); @@ -1813,7 +1862,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) u8 bkup = 0, lookup_by_id = 0; int ret; - ret = mptcp_pm_parse_addr(attr, info, false, &addr); + ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1852,6 +1901,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); } +bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) +{ + return genl_has_listeners(&mptcp_genl_family, + sock_net((const struct sock *)msk), + MPTCP_PM_EV_GRP_OFFSET); +} + static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) { const struct inet_sock *issk = inet_sk(ssk); @@ -1972,6 +2028,9 @@ static int mptcp_event_created(struct sk_buff *skb, if (err) return err; + if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) + return -EMSGSIZE; + return mptcp_event_add_subflow(skb, ssk); } @@ -2006,10 +2065,12 @@ nla_put_failure: kfree_skb(skb); } -void mptcp_event_addr_announced(const struct mptcp_sock *msk, +void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info) { - struct net *net = sock_net((const struct sock *)msk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct net *net = sock_net(ssk); struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2031,7 +2092,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk, if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) goto nla_put_failure; - if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, + info->port == 0 ? + inet_sk(ssk)->inet_dport : + info->port)) goto nla_put_failure; switch (info->family) { @@ -2148,6 +2212,26 @@ static const struct genl_small_ops mptcp_pm_ops[] = { .doit = mptcp_nl_cmd_set_flags, .flags = GENL_ADMIN_PERM, }, + { + .cmd = MPTCP_PM_CMD_ANNOUNCE, + .doit = mptcp_nl_cmd_announce, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_REMOVE, + .doit = mptcp_nl_cmd_remove, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, + .doit = mptcp_nl_cmd_sf_create, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, + .doit = mptcp_nl_cmd_sf_destroy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family mptcp_genl_family __ro_after_init = { @@ -2165,7 +2249,7 @@ static struct genl_family mptcp_genl_family __ro_after_init = { static int __net_init pm_nl_init_net(struct net *net) { - struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); INIT_LIST_HEAD_RCU(&pernet->local_addr_list); @@ -2187,7 +2271,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list) struct net *net; list_for_each_entry(net, net_list, exit_list) { - struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); /* net is removed from namespace list, can't race with * other modifiers, also netns core already waited for a diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c new file mode 100644 index 000000000000..f56378e4f597 --- /dev/null +++ b/net/mptcp/pm_userspace.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2022, Intel Corporation. + */ + +#include "protocol.h" + +void mptcp_free_local_addr_list(struct mptcp_sock *msk) +{ + struct mptcp_pm_addr_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; + LIST_HEAD(free_list); + + if (!mptcp_pm_is_userspace(msk)) + return; + + spin_lock_bh(&msk->pm.lock); + list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list); + spin_unlock_bh(&msk->pm.lock); + + list_for_each_entry_safe(entry, tmp, &free_list, list) { + sock_kfree_s(sk, entry, sizeof(*entry)); + } +} + +int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry) +{ + DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + struct mptcp_pm_addr_entry *match = NULL; + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *e; + bool addr_match = false; + bool id_match = false; + int ret = -EINVAL; + + bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); + if (addr_match && entry->addr.id == 0) + entry->addr.id = e->addr.id; + id_match = (e->addr.id == entry->addr.id); + if (addr_match && id_match) { + match = e; + break; + } else if (addr_match || id_match) { + break; + } + __set_bit(e->addr.id, id_bitmap); + } + + if (!match && !addr_match && !id_match) { + /* Memory for the entry is allocated from the + * sock option buffer. + */ + e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC); + if (!e) { + spin_unlock_bh(&msk->pm.lock); + return -ENOMEM; + } + + *e = *entry; + if (!e->addr.id) + e->addr.id = find_next_zero_bit(id_bitmap, + MPTCP_PM_MAX_ADDR_ID + 1, + 1); + list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); + ret = e->addr.id; + } else if (match) { + ret = entry->addr.id; + } + + spin_unlock_bh(&msk->pm.lock); + return ret; +} + +int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, + unsigned int id, + u8 *flags, int *ifindex) +{ + struct mptcp_pm_addr_entry *entry, *match = NULL; + + *flags = 0; + *ifindex = 0; + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (id == entry->addr.id) { + match = entry; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (match) { + *flags = match->flags; + *ifindex = match->ifindex; + } + + return 0; +} + +int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + struct mptcp_pm_addr_entry new_entry; + __be16 msk_sport = ((struct inet_sock *) + inet_sk((struct sock *)msk))->inet_sport; + + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); + new_entry.addr = *skc; + new_entry.addr.id = 0; + new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; + + if (new_entry.addr.port == msk_sport) + new_entry.addr.port = 0; + + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); +} + +int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_pm_addr_entry addr_val; + struct mptcp_sock *msk; + int err = -EINVAL; + u32 token_val; + + if (!addr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto announce_err; + } + + err = mptcp_pm_parse_entry(addr, info, true, &addr_val); + if (err < 0) { + GENL_SET_ERR_MSG(info, "error parsing local address"); + goto announce_err; + } + + if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + goto announce_err; + } + + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + if (err < 0) { + GENL_SET_ERR_MSG(info, "did not match address and id"); + goto announce_err; + } + + lock_sock((struct sock *)msk); + spin_lock_bh(&msk->pm.lock); + + if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + mptcp_pm_announce_addr(msk, &addr_val.addr, false); + mptcp_pm_nl_addr_send_ack(msk); + } + + spin_unlock_bh(&msk->pm.lock); + release_sock((struct sock *)msk); + + err = 0; + announce_err: + sock_put((struct sock *)msk); + return err; +} + +int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; + struct mptcp_pm_addr_entry *match = NULL; + struct mptcp_pm_addr_entry *entry; + struct mptcp_sock *msk; + LIST_HEAD(free_list); + int err = -EINVAL; + u32 token_val; + u8 id_val; + + if (!id || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + id_val = nla_get_u8(id); + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto remove_err; + } + + lock_sock((struct sock *)msk); + + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (entry->addr.id == id_val) { + match = entry; + break; + } + } + + if (!match) { + GENL_SET_ERR_MSG(info, "address with specified id not found"); + release_sock((struct sock *)msk); + goto remove_err; + } + + list_move(&match->list, &free_list); + + mptcp_pm_remove_addrs_and_subflows(msk, &free_list); + + release_sock((struct sock *)msk); + + list_for_each_entry_safe(match, entry, &free_list, list) { + sock_kfree_s((struct sock *)msk, match, sizeof(*match)); + } + + err = 0; + remove_err: + sock_put((struct sock *)msk); + return err; +} + +int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info addr_r; + struct mptcp_addr_info addr_l; + struct mptcp_sock *msk; + int err = -EINVAL; + struct sock *sk; + u32 token_val; + + if (!laddr || !raddr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(genl_info_net(info), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto create_err; + } + + err = mptcp_pm_parse_addr(laddr, info, &addr_l); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + goto create_err; + } + + if (addr_l.id == 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id"); + goto create_err; + } + + err = mptcp_pm_parse_addr(raddr, info, &addr_r); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + goto create_err; + } + + sk = &msk->sk.icsk_inet.sk; + lock_sock(sk); + + err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); + + release_sock(sk); + + create_err: + sock_put((struct sock *)msk); + return err; +} + +static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, + const struct mptcp_addr_info *local, + const struct mptcp_addr_info *remote) +{ + struct sock *sk = &msk->sk.icsk_inet.sk; + struct mptcp_subflow_context *subflow; + struct sock *found = NULL; + + if (local->family != remote->family) + return NULL; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + const struct inet_sock *issk; + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(subflow); + + if (local->family != ssk->sk_family) + continue; + + issk = inet_sk(ssk); + + switch (ssk->sk_family) { + case AF_INET: + if (issk->inet_saddr != local->addr.s_addr || + issk->inet_daddr != remote->addr.s_addr) + continue; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: { + const struct ipv6_pinfo *pinfo = inet6_sk(ssk); + + if (!ipv6_addr_equal(&local->addr6, &pinfo->saddr) || + !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr)) + continue; + break; + } +#endif + default: + continue; + } + + if (issk->inet_sport == local->port && + issk->inet_dport == remote->port) { + found = ssk; + goto found; + } + } + +found: + release_sock(sk); + + return found; +} + +int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info addr_l; + struct mptcp_addr_info addr_r; + struct mptcp_sock *msk; + struct sock *sk, *ssk; + int err = -EINVAL; + u32 token_val; + + if (!laddr || !raddr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(genl_info_net(info), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto destroy_err; + } + + err = mptcp_pm_parse_addr(laddr, info, &addr_l); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + goto destroy_err; + } + + err = mptcp_pm_parse_addr(raddr, info, &addr_r); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + goto destroy_err; + } + + if (addr_l.family != addr_r.family) { + GENL_SET_ERR_MSG(info, "address families do not match"); + goto destroy_err; + } + + if (!addr_l.port || !addr_r.port) { + GENL_SET_ERR_MSG(info, "missing local or remote port"); + goto destroy_err; + } + + sk = &msk->sk.icsk_inet.sk; + ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); + if (ssk) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); + mptcp_close_ssk(sk, ssk, subflow); + err = 0; + } else { + err = -ESRCH; + } + + destroy_err: + sock_put((struct sock *)msk); + return err; +} diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0cbea3b6d0a4..52ed2c0ac901 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1229,6 +1229,22 @@ static void mptcp_update_data_checksum(struct sk_buff *skb, int added) mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset)); } +static void mptcp_update_infinite_map(struct mptcp_sock *msk, + struct sock *ssk, + struct mptcp_ext *mpext) +{ + if (!mpext) + return; + + mpext->infinite_map = 1; + mpext->data_len = 0; + + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); + mptcp_subflow_ctx(ssk)->send_infinite_map = 0; + pr_fallback(msk); + __mptcp_do_fallback(msk); +} + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1360,6 +1376,8 @@ alloc_skb: out: if (READ_ONCE(msk->csum_enabled)) mptcp_update_data_checksum(skb, copy); + if (mptcp_subflow_ctx(ssk)->send_infinite_map) + mptcp_update_infinite_map(msk, ssk, mpext); trace_mptcp_sendmsg_frag(mpext); mptcp_subflow_ctx(ssk)->rel_write_seq += copy; return copy; @@ -1587,8 +1605,10 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) out: /* ensure the rtx timer is running */ + mptcp_data_lock(sk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); + mptcp_data_unlock(sk); if (copied) __mptcp_check_send_data_fin(sk); } @@ -2012,7 +2032,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) } static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct mptcp_sock *msk = mptcp_sk(sk); struct scm_timestamping_internal tss; @@ -2030,7 +2050,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out_err; } - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); len = min_t(size_t, len, INT_MAX); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2149,10 +2169,38 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } +static struct mptcp_subflow_context * +mp_fail_response_expect_subflow(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow, *ret = NULL; + + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->mp_fail_response_expect)) { + ret = subflow; + break; + } + } + + return ret; +} + +static void mptcp_check_mp_fail_response(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + + bh_lock_sock(sk); + subflow = mp_fail_response_expect_subflow(msk); + if (subflow) + __set_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags); + bh_unlock_sock(sk); +} + static void mptcp_timeout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); + mptcp_check_mp_fail_response(mptcp_sk(sk)); mptcp_schedule_work(sk); sock_put(sk); } @@ -2465,6 +2513,7 @@ static void __mptcp_retrans(struct sock *sk) dfrag->already_sent = max(dfrag->already_sent, info.sent); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + WRITE_ONCE(msk->allow_infinite_fallback, false); } release_sock(ssk); @@ -2472,8 +2521,27 @@ static void __mptcp_retrans(struct sock *sk) reset_timer: mptcp_check_and_set_pending(sk); + mptcp_data_lock(sk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); + mptcp_data_unlock(sk); +} + +static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct sock *ssk; + bool slow; + + subflow = mp_fail_response_expect_subflow(msk); + if (subflow) { + pr_debug("MP_FAIL doesn't respond, reset the subflow"); + + ssk = mptcp_subflow_tcp_sock(subflow); + slow = lock_sock_fast(ssk); + mptcp_subflow_reset(ssk); + unlock_sock_fast(ssk, slow); + } } static void mptcp_worker(struct work_struct *work) @@ -2516,6 +2584,9 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); + if (test_and_clear_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags)) + mptcp_mp_fail_no_response(msk); + unlock: release_sock(sk); sock_put(sk); @@ -2539,6 +2610,7 @@ static int __mptcp_init_sock(struct sock *sk) msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); + WRITE_ONCE(msk->allow_infinite_fallback, true); msk->recovery = false; mptcp_pm_data_init(msk); @@ -2631,8 +2703,10 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); + mptcp_data_lock(sk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); + mptcp_data_unlock(sk); } break; } @@ -2733,8 +2807,10 @@ static void __mptcp_destroy_sock(struct sock *sk) /* join list will be eventually flushed (with rst) at sock lock release time*/ list_splice_init(&msk->conn_list, &conn_list); - sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); + mptcp_data_lock(sk); + mptcp_stop_timer(sk); sk_stop_timer(sk, &sk->sk_timer); + mptcp_data_unlock(sk); msk->pm.status = 0; /* clears msk->subflow, allowing the following loop to close @@ -2796,7 +2872,9 @@ cleanup: __mptcp_destroy_sock(sk); do_cancel_work = true; } else { + mptcp_data_lock(sk); sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN); + mptcp_data_unlock(sk); } release_sock(sk); if (do_cancel_work) @@ -2841,8 +2919,10 @@ static int mptcp_disconnect(struct sock *sk, int flags) __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); } - sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); + mptcp_data_lock(sk); + mptcp_stop_timer(sk); sk_stop_timer(sk, &sk->sk_timer); + mptcp_data_unlock(sk); if (mptcp_sk(sk)->token) mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); @@ -3017,6 +3097,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk) msk->rmem_fwd_alloc = 0; mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); + mptcp_free_local_addr_list(msk); } static void mptcp_destroy(struct sock *sk) @@ -3092,15 +3173,19 @@ static void mptcp_release_cb(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); } - /* be sure to set the current sk state before tacking actions - * depending on sk_state - */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) - __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); - if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) - __mptcp_error_report(sk); + if (unlikely(&msk->cb_flags)) { + /* be sure to set the current sk state before tacking actions + * depending on sk_state, that is processing MPTCP_ERROR_REPORT + */ + if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) + __mptcp_set_connected(sk); + if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) + __mptcp_error_report(sk); + if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags)) + msk->last_snd = NULL; + } __mptcp_update_rmem(sk); } @@ -3237,15 +3322,12 @@ bool mptcp_finish_join(struct sock *ssk) return false; } - if (!msk->pm.server_side) + if (!list_empty(&subflow->node)) goto out; if (!mptcp_pm_allow_new_subflow(msk)) goto err_prohibited; - if (WARN_ON_ONCE(!list_empty(&subflow->node))) - goto err_prohibited; - /* active connections are already on conn_list. * If we can't acquire msk socket lock here, let the release callback * handle it @@ -3271,6 +3353,7 @@ err_prohibited: } subflow->map_seq = READ_ONCE(msk->ack_seq); + WRITE_ONCE(msk->allow_infinite_fallback, false); out: mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3c1a3036550f..f542aeaa5b09 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -11,6 +11,7 @@ #include <net/tcp.h> #include <net/inet_connection_sock.h> #include <uapi/linux/mptcp.h> +#include <net/genetlink.h> #define MPTCP_SUPPORTED_VERSION 1 @@ -116,6 +117,7 @@ #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_FAIL_NO_RESPONSE 6 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 @@ -124,6 +126,7 @@ #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_CONNECTED 6 +#define MPTCP_RESET_SCHEDULER 7 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -182,6 +185,14 @@ enum mptcp_pm_status { */ }; +enum mptcp_pm_type { + MPTCP_PM_TYPE_KERNEL = 0, + MPTCP_PM_TYPE_USERSPACE, + + __MPTCP_PM_TYPE_NR, + __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1, +}; + /* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */ #define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1) @@ -198,6 +209,7 @@ struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; struct list_head anno_list; + struct list_head userspace_pm_local_addr_list; spinlock_t lock; /*protects the whole PM data */ @@ -210,6 +222,7 @@ struct mptcp_pm_data { u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; + u8 pm_type; u8 subflows; u8 status; DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); @@ -217,6 +230,14 @@ struct mptcp_pm_data { struct mptcp_rm_list rm_list_rx; }; +struct mptcp_pm_addr_entry { + struct list_head list; + struct mptcp_addr_info addr; + u8 flags; + int ifindex; + struct socket *lsk; +}; + struct mptcp_data_frag { struct list_head list; u64 data_seq; @@ -262,6 +283,7 @@ struct mptcp_sock { bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; + bool allow_infinite_fallback; u8 recvmsg_inq:1, cork:1, nodelay:1; @@ -439,12 +461,14 @@ struct mptcp_subflow_context { send_mp_prio : 1, send_mp_fail : 1, send_fastclose : 1, + send_infinite_map : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ local_id_valid : 1; /* local_id is correctly initialized */ enum mptcp_data_avail data_avail; + bool mp_fail_response_expect; u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -571,6 +595,7 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); +int mptcp_get_pm_type(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); @@ -586,6 +611,9 @@ void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +bool mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port); + /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *remote); @@ -728,6 +756,11 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); +int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, + struct mptcp_addr_info *addr); +int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, + bool require_family, + struct mptcp_pm_addr_entry *entry); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); @@ -738,7 +771,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, const struct mptcp_subflow_context *subflow); -void mptcp_pm_add_addr_received(struct mptcp_sock *msk, +void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); @@ -748,6 +781,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_pm_addr_entry *entry); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * @@ -756,19 +791,34 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, +int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, + unsigned int id, u8 *flags, int *ifindex); +int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, + unsigned int id, + u8 *flags, int *ifindex); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); +void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list); + +int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry); +void mptcp_free_local_addr_list(struct mptcp_sock *msk); +int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); +void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); +bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -791,6 +841,16 @@ static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL); } +static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; +} + +static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL; +} + static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; @@ -821,9 +881,9 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); +int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); void __init mptcp_pm_nl_init(void); -void mptcp_pm_nl_data_init(struct mptcp_sock *msk); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); @@ -875,13 +935,28 @@ static inline void mptcp_do_fallback(struct sock *sk) #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) +static inline bool mptcp_check_infinite_map(struct sk_buff *skb) +{ + struct mptcp_ext *mpext; + + mpext = skb ? mptcp_get_ext(skb) : NULL; + if (mpext && mpext->infinite_map) + return true; + + return false; +} + +static inline bool is_active_ssk(struct mptcp_subflow_context *subflow) +{ + return (subflow->request_mptcp || subflow->request_join); +} + static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct sock *parent = subflow->conn; return sk->sk_state == TCP_ESTABLISHED && - !mptcp_sk(parent)->pm.server_side && + is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index f949d22f52bd..826b0c1dae98 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -853,15 +853,11 @@ out: void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { - struct sock *sk = &msk->sk.icsk_inet.sk; u32 flags = 0; - bool slow; u8 val; memset(info, 0, sizeof(*info)); - slow = lock_sock_fast(sk); - info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); @@ -882,8 +878,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_snd_una = READ_ONCE(msk->snd_una); info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); - - unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index aba260f547da..6d59336a8e1e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) { return mptcp_is_fully_established((void *)msk) && - READ_ONCE(msk->pm.accept_subflow); + ((mptcp_pm_is_userspace(msk) && + mptcp_userspace_pm_active(msk)) || + READ_ONCE(msk->pm.accept_subflow)); } /* validate received token and create truncated hmac and nonce for SYN-ACK */ @@ -441,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; + subflow->remote_id = mp_opt.join_id; pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -968,6 +971,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool csum_reqd = READ_ONCE(msk->csum_enabled); + struct sock *sk = (struct sock *)msk; struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -1006,7 +1010,15 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len = mpext->data_len; if (data_len == 0) { + pr_debug("infinite mapping received"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); + subflow->map_data_len = 0; + if (sk && inet_sk_state_load(sk) != TCP_CLOSE) { + mptcp_data_lock(sk); + if (inet_sk_state_load(sk) != TCP_CLOSE) + sk_stop_timer(sk, &sk->sk_timer); + mptcp_data_unlock(sk); + } return MAPPING_INVALID; } @@ -1203,35 +1215,45 @@ no_data: return false; fallback: - /* RFC 8684 section 3.7. */ - if (subflow->send_mp_fail) { - if (mptcp_has_another_subflow(ssk)) { - while ((skb = skb_peek(&ssk->sk_receive_queue))) - sk_eat_skb(ssk, skb); + if (!__mptcp_check_fallback(msk)) { + /* RFC 8684 section 3.7. */ + if (subflow->send_mp_fail) { + if (mptcp_has_another_subflow(ssk) || + !READ_ONCE(msk->allow_infinite_fallback)) { + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; + tcp_send_active_reset(ssk, GFP_ATOMIC); + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + } else { + WRITE_ONCE(subflow->mp_fail_response_expect, true); + /* The data lock is acquired in __mptcp_move_skbs() */ + sk_reset_timer((struct sock *)msk, + &((struct sock *)msk)->sk_timer, + jiffies + TCP_RTO_MAX); + } + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + return true; } - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; - tcp_send_active_reset(ssk, GFP_ATOMIC); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); - return true; - } - if (subflow->mp_join || subflow->fully_established) { - /* fatal protocol error, close the socket. - * subflow_error_report() will introduce the appropriate barriers - */ - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; - tcp_send_active_reset(ssk, GFP_ATOMIC); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); - return false; + if ((subflow->mp_join || subflow->fully_established) && subflow->map_data_len) { + /* fatal protocol error, close the socket. + * subflow_error_report() will introduce the appropriate barriers + */ + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; + tcp_send_active_reset(ssk, GFP_ATOMIC); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + return false; + } + + __mptcp_do_fallback(msk); } - __mptcp_do_fallback(msk); skb = skb_peek(&ssk->sk_receive_queue); subflow->map_valid = 1; subflow->map_seq = READ_ONCE(msk->ack_seq); @@ -1446,7 +1468,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (local_id) subflow_set_local_id(subflow, local_id); - mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id, + mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id, &flags, &ifindex); subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; @@ -1483,6 +1505,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); + WRITE_ONCE(msk->allow_infinite_fallback, false); return err; failed_unlink: diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7f645328b47f..efab2b06d373 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1767,8 +1767,6 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) #ifdef CONFIG_SYSCTL -static int three = 3; - static int proc_do_defense_mode(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -1977,7 +1975,7 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &three, + .extra2 = SYSCTL_THREE, }, { .procname = "nat_icmp_send", diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index fe98673dd5ac..bc4d5cd63a94 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -38,6 +38,7 @@ * @l4proto - Layer 4 protocol * Values: * IPPROTO_TCP, IPPROTO_UDP + * @dir: - connection tracking tuple direction. * @reserved - Reserved member, will be reused for more options in future * Values: * 0 @@ -46,7 +47,8 @@ struct bpf_ct_opts { s32 netns_id; s32 error; u8 l4proto; - u8 reserved[3]; + u8 dir; + u8 reserved[2]; }; enum { @@ -56,10 +58,11 @@ enum { static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, struct bpf_sock_tuple *bpf_tuple, u32 tuple_len, u8 protonum, - s32 netns_id) + s32 netns_id, u8 *dir) { struct nf_conntrack_tuple_hash *hash; struct nf_conntrack_tuple tuple; + struct nf_conn *ct; if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) return ERR_PTR(-EPROTO); @@ -99,7 +102,12 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, put_net(net); if (!hash) return ERR_PTR(-ENOENT); - return nf_ct_tuplehash_to_ctrack(hash); + + ct = nf_ct_tuplehash_to_ctrack(hash); + if (dir) + *dir = NF_CT_DIRECTION(hash); + + return ct; } __diag_push(); @@ -135,13 +143,13 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, if (!opts) return NULL; if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts__sz != NF_BPF_CT_OPTS_SZ) { opts->error = -EINVAL; return NULL; } caller_net = dev_net(ctx->rxq->dev); nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, - opts->netns_id); + opts->netns_id, &opts->dir); if (IS_ERR(nfct)) { opts->error = PTR_ERR(nfct); return NULL; @@ -178,13 +186,13 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, if (!opts) return NULL; if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts__sz != NF_BPF_CT_OPTS_SZ) { opts->error = -EINVAL; return NULL; } caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, - opts->netns_id); + opts->netns_id, &opts->dir); if (IS_ERR(nfct)) { opts->error = PTR_ERR(nfct); return NULL; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 07e65b4e92f8..0cb2da0a759a 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -96,8 +96,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) static void ecache_work(struct work_struct *work) { - struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work); - struct netns_ct *ctnet = cnet->ct_net; + struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work); + struct netns_ct *ctnet = cnet->ecache.ct_net; int cpu, delay = -1; struct ct_pcpu *pcpu; @@ -127,7 +127,7 @@ static void ecache_work(struct work_struct *work) ctnet->ecache_dwork_pending = delay > 0; if (delay >= 0) - schedule_delayed_work(&cnet->ecache_dwork, delay); + schedule_delayed_work(&cnet->ecache.dwork, delay); } static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, @@ -293,12 +293,12 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) struct nf_conntrack_net *cnet = nf_ct_pernet(net); if (state == NFCT_ECACHE_DESTROY_FAIL && - !delayed_work_pending(&cnet->ecache_dwork)) { - schedule_delayed_work(&cnet->ecache_dwork, HZ); + !delayed_work_pending(&cnet->ecache.dwork)) { + schedule_delayed_work(&cnet->ecache.dwork, HZ); net->ct.ecache_dwork_pending = true; } else if (state == NFCT_ECACHE_DESTROY_SENT) { net->ct.ecache_dwork_pending = false; - mod_delayed_work(system_wq, &cnet->ecache_dwork, 0); + mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); } } @@ -310,8 +310,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net) struct nf_conntrack_net *cnet = nf_ct_pernet(net); net->ct.sysctl_events = nf_ct_events; - cnet->ct_net = &net->ct; - INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work); + + cnet->ecache.ct_net = &net->ct; + INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work); BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ } @@ -320,5 +321,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); - cancel_delayed_work_sync(&cnet->ecache_dwork); + cancel_delayed_work_sync(&cnet->ecache.dwork); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1ea2ad732d57..924d766e6c53 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1708,6 +1708,47 @@ static int ctnetlink_done_list(struct netlink_callback *cb) return 0; } +static int ctnetlink_dump_one_entry(struct sk_buff *skb, + struct netlink_callback *cb, + struct nf_conn *ct, + bool dying) +{ + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u8 l3proto = nfmsg->nfgen_family; + int res; + + if (l3proto && nf_ct_l3num(ct) != l3proto) + return 0; + + if (ctx->last) { + if (ct != ctx->last) + return 0; + + ctx->last = NULL; + } + + /* We can't dump extension info for the unconfirmed + * list because unconfirmed conntracks can have + * ct->ext reallocated (and thus freed). + * + * In the dying list case ct->ext can't be free'd + * until after we drop pcpu->lock. + */ + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct, dying, 0); + if (res < 0) { + if (!refcount_inc_not_zero(&ct->ct_general.use)) + return 0; + + ctx->last = ct; + } + + return res; +} + static int ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) { @@ -1715,12 +1756,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - u_int8_t l3proto = nfmsg->nfgen_family; - int res; - int cpu; struct hlist_nulls_head *list; struct net *net = sock_net(skb->sk); + int res, cpu; if (ctx->done) return 0; @@ -1739,30 +1777,10 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying restart: hlist_nulls_for_each_entry(h, n, list, hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - if (l3proto && nf_ct_l3num(ct) != l3proto) - continue; - if (ctx->last) { - if (ct != last) - continue; - ctx->last = NULL; - } - /* We can't dump extension info for the unconfirmed - * list because unconfirmed conntracks can have - * ct->ext reallocated (and thus freed). - * - * In the dying list case ct->ext can't be free'd - * until after we drop pcpu->lock. - */ - res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFNL_MSG_TYPE(cb->nlh->nlmsg_type), - ct, dying, 0); + res = ctnetlink_dump_one_entry(skb, cb, ct, dying); if (res < 0) { - if (!refcount_inc_not_zero(&ct->ct_general.use)) - continue; ctx->cpu = cpu; - ctx->last = ct; spin_unlock_bh(&pcpu->lock); goto out; } diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 13234641cdb3..77bcb10fc586 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -40,6 +40,12 @@ struct arppayload { unsigned char ip_dst[4]; }; +/* Guard against containers flooding syslog. */ +static bool nf_log_allowed(const struct net *net) +{ + return net_eq(net, &init_net) || sysctl_nf_log_all_netns; +} + static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb) { u16 vid; @@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, { struct nf_log_buf *m; - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); @@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m, nf_log_buf_add(m, "MARK=0x%x ", skb->mark); } -static void dump_ipv4_mac_header(struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) +static void dump_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) { struct net_device *dev = skb->dev; unsigned int logflags = 0; @@ -798,9 +803,26 @@ fallback: const unsigned char *p = skb_mac_header(skb); unsigned int i; - nf_log_buf_add(m, "%02x", *p++); - for (i = 1; i < dev->hard_header_len; i++, p++) - nf_log_buf_add(m, ":%02x", *p); + if (dev->type == ARPHRD_SIT) { + p -= ETH_HLEN; + + if (p < skb->head) + p = NULL; + } + + if (p) { + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++) + nf_log_buf_add(m, ":%02x", *p++); + } + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + + nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr, + &iph->daddr); + } } nf_log_buf_add(m, " "); } @@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, { struct nf_log_buf *m; - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); @@ -827,7 +848,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, out, loginfo, prefix); if (in) - dump_ipv4_mac_header(m, loginfo, skb); + dump_mac_header(m, loginfo, skb); dump_ipv4_packet(net, m, loginfo, skb, 0); @@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = { .me = THIS_MODULE, }; -static void dump_ipv6_mac_header(struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & NF_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); - nf_log_dump_vlan(m, skb); - nf_log_buf_add(m, "MACPROTO=%04x ", - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - nf_log_buf_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int len = dev->hard_header_len; - unsigned int i; - - if (dev->type == ARPHRD_SIT) { - p -= ETH_HLEN; - - if (p < skb->head) - p = NULL; - } - - if (p) { - nf_log_buf_add(m, "%02x", *p++); - for (i = 1; i < len; i++) - nf_log_buf_add(m, ":%02x", *p++); - } - nf_log_buf_add(m, " "); - - if (dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, - &iph->daddr); - } - } else { - nf_log_buf_add(m, " "); - } -} - static void nf_log_ip6_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, { struct nf_log_buf *m; - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); @@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, loginfo, prefix); if (in) - dump_ipv6_mac_header(m, loginfo, skb); + dump_mac_header(m, loginfo, skb); dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1); @@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = { .me = THIS_MODULE, }; +static void nf_log_unknown_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + if (!nf_log_allowed(net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, + prefix); + + dump_mac_header(m, loginfo, skb); + + nf_log_buf_close(m); +} + static void nf_log_netdev_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, @@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf, case htons(ETH_P_RARP): nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; + default: + nf_log_unknown_packet(net, pf, hooknum, skb, + in, out, loginfo, prefix); + break; } } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 16c3a39689f4..f3ad02a399f8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8367,10 +8367,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; - rule = list_entry(&chain->rules, struct nft_rule, list); - data_size = 0; - list_for_each_entry_continue(rule, &chain->rules, list) { + list_for_each_entry(rule, &chain->rules, list) { if (nft_is_active_next(net, rule)) { data_size += sizeof(*prule) + rule->dlen; if (data_size > INT_MAX) @@ -8387,7 +8385,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha data_boundary = data + data_size; size = 0; - list_for_each_entry_continue(rule, &chain->rules, list) { + list_for_each_entry(rule, &chain->rules, list) { if (!nft_is_active_next(net, rule)) continue; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index b0d8888a539b..eea486f32971 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -158,6 +158,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb, timeout->timeout.l3num = l3num; timeout->timeout.l4proto = l4proto; refcount_set(&timeout->refcnt, 1); + __module_get(THIS_MODULE); list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list); return 0; @@ -506,13 +507,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net, if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - if (!try_module_get(THIS_MODULE)) + if (!refcount_inc_not_zero(&timeout->refcnt)) goto err; - - if (!refcount_inc_not_zero(&timeout->refcnt)) { - module_put(THIS_MODULE); - goto err; - } matching = timeout; break; } @@ -525,10 +521,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t) struct ctnl_timeout *timeout = container_of(t, struct ctnl_timeout, timeout); - if (refcount_dec_and_test(&timeout->refcnt)) + if (refcount_dec_and_test(&timeout->refcnt)) { kfree_rcu(timeout, rcu_head); - - module_put(THIS_MODULE); + module_put(THIS_MODULE); + } } static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index f590ee1c8a1b..83590afe3768 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src, { unsigned int i; - for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) + for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++) dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i]; } @@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv, return err; if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) { err = -EINVAL; - goto err1; + goto err_mask_release; } err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor, tb[NFTA_BITWISE_XOR]); if (err < 0) - goto err1; + goto err_mask_release; if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) { err = -EINVAL; - goto err2; + goto err_xor_release; } return 0; -err2: + +err_xor_release: nft_data_release(&priv->xor, xor.type); -err1: +err_mask_release: nft_data_release(&priv->mask, mask.type); return err; } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index f198f2d9ef90..1f12d7ade606 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, case NFT_FIB_RESULT_OIF: case NFT_FIB_RESULT_OIFNAME: hooks = (1 << NF_INET_PRE_ROUTING); + if (priv->flags & NFTA_FIB_F_IIF) { + hooks |= (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + } break; case NFT_FIB_RESULT_ADDRTYPE: if (priv->flags & NFTA_FIB_F_IIF) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 05a3795eac8e..1b5a9c2e1c29 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1931,7 +1931,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - int noblock = flags & MSG_DONTWAIT; size_t copied; struct sk_buff *skb, *data_skb; int err, ret; @@ -1941,7 +1940,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fa9dc2ba3941..6f7f4392cffb 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1159,7 +1159,8 @@ static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } /* Now we can treat all alike */ - if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) { + skb = skb_recv_datagram(sk, flags, &er); + if (!skb) { release_sock(sk); return er; } diff --git a/net/nfc/core.c b/net/nfc/core.c index 5b286e1e0a6f..6ff3e10ff8e3 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1166,6 +1166,7 @@ void nfc_unregister_device(struct nfc_dev *dev) if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); + dev->rfkill = NULL; } dev->shutting_down = true; device_unlock(&dev->dev); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 4ca35791c93b..77642d18a3b4 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -821,7 +821,6 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; unsigned int copied, rlen; struct sk_buff *skb, *cskb; @@ -842,7 +841,7 @@ static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { pr_err("Recv datagram failed state %d %d %d", sk->sk_state, err, sock_error(sk)); diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 0ca214ab5aef..8dd569765f96 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -238,7 +238,6 @@ static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; int copied; @@ -246,7 +245,7 @@ static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, pr_debug("sock=%p sk=%p len=%zu flags=%d\n", sock, sk, len, flags); - skb = skb_recv_datagram(sk, flags, noblock, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) return rc; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 002d2b9c69dd..677f9cfa9660 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1924,12 +1924,20 @@ oom: static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) { + int depth; + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && sock->type == SOCK_RAW) { skb_reset_mac_header(skb); skb->protocol = dev_parse_header_protocol(skb); } + /* Move network header to the right position for VLAN tagged packets */ + if (likely(skb->dev->type == ARPHRD_ETHER) && + eth_type_vlan(skb->protocol) && + __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + skb_set_network_header(skb, depth); + skb_probe_transport_header(skb); } @@ -3047,6 +3055,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + + packet_parse_headers(skb, sock); + if (has_vnet_hdr) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); if (err) @@ -3055,11 +3068,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) virtio_net_hdr_set_proto(skb, &vnet_hdr); } - packet_parse_headers(skb, sock); - - if (unlikely(extra_len == 4)) - skb->no_fcs = 1; - err = po->xmit(skb); if (unlikely(err != 0)) { if (err > 0) @@ -3426,7 +3434,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, * but then it will block. */ - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); /* * An error occurred so return it. Because skb_recv_datagram() @@ -3469,7 +3477,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sll->sll_protocol = skb->protocol; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { const size_t max_len = min(sizeof(skb->cb), diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 393e6aa7a592..ff5f49ab236e 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -112,7 +112,7 @@ static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; @@ -123,7 +123,7 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, MSG_CMSG_COMPAT)) goto out_nofree; - skb = skb_recv_datagram(sk, flags, noblock, &rval); + skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 65d463ad8770..83ea13a50690 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -772,7 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, u8 pipe_handle, enabled, n_sb; u8 aligned = 0; - skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp); + skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + errp); if (!skb) return NULL; @@ -1238,7 +1239,7 @@ struct sk_buff *pep_read(struct sock *sk) } static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_buff *skb; int err; @@ -1267,7 +1268,7 @@ static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return -EINVAL; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); lock_sock(sk); if (skb == NULL) { if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index ec2322529727..5c2fb992803b 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -1035,8 +1035,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, return -EADDRNOTAVAIL; } - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) { release_sock(sk); return rc; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 30a1cf4c16c6..bf2d986a6bc3 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1230,7 +1230,8 @@ static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return -ENOTCONN; /* Now we can treat all alike */ - if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) + skb = skb_recv_datagram(sk, flags, &er); + if (!skb) return er; qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4f51094da9da..da9733da9868 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -195,7 +195,7 @@ static int offload_action_init(struct flow_offload_action *fl_action, if (act->ops->offload_act_setup) { spin_lock_bh(&act->tcfa_lock); err = act->ops->offload_act_setup(act, fl_action, NULL, - false); + false, extack); spin_unlock_bh(&act->tcfa_lock); return err; } @@ -271,7 +271,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action, if (err) goto fl_err; - err = tc_setup_action(&fl_action->action, actions); + err = tc_setup_action(&fl_action->action, actions, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup tc actions for offload"); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e0f515b774ca..22847ee009ef 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -696,7 +696,8 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) } static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b1f502fce595..8af9d6e5ba61 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1584,7 +1584,8 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, } static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index bde6a6c01e64..ac29d1065232 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -253,7 +253,8 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) } static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -267,7 +268,17 @@ static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, } else if (is_tcf_gact_goto_chain(act)) { entry->id = FLOW_ACTION_GOTO; entry->chain_index = tcf_gact_goto_chain_index(act); + } else if (is_tcf_gact_continue(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"continue\" action is not supported"); + return -EOPNOTSUPP; + } else if (is_tcf_gact_reclassify(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"reclassify\" action is not supported"); + return -EOPNOTSUPP; + } else if (is_tcf_gact_pipe(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"pipe\" action is not supported"); + return -EOPNOTSUPP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported generic action offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index d56e73843a4b..fd5155274733 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -619,7 +619,8 @@ static int tcf_gate_get_entries(struct flow_action_entry *entry, } static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { int err; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 39acd1d18609..ebb92fb072ab 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -460,7 +460,8 @@ static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, } static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -478,6 +479,7 @@ static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, entry->id = FLOW_ACTION_MIRRED_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index b9ff3459fdab..adabeccb63e1 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -385,7 +385,8 @@ static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index) } static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -410,7 +411,14 @@ static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, entry->mpls_mangle.bos = tcf_mpls_bos(act); entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; + case TCA_MPLS_ACT_DEC_TTL: + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"dec_ttl\" option is used"); + return -EOPNOTSUPP; + case TCA_MPLS_ACT_MAC_PUSH: + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"mac_push\" option is used"); + return -EOPNOTSUPP; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported MPLS mode offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 31fcd279c177..e01ef7f109f4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -488,7 +488,8 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) } static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -503,6 +504,7 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, entry->id = FLOW_ACTION_ADD; break; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload"); return -EOPNOTSUPP; } entry->mangle.htype = tcf_pedit_htype(act, k); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f4d917705263..79c8901f66ab 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -419,7 +419,8 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } -static int tcf_police_act_to_flow_act(int tc_act, u32 *extval) +static int tcf_police_act_to_flow_act(int tc_act, u32 *extval, + struct netlink_ext_ack *extack) { int act_id = -EOPNOTSUPP; @@ -430,19 +431,28 @@ static int tcf_police_act_to_flow_act(int tc_act, u32 *extval) act_id = FLOW_ACTION_DROP; else if (tc_act == TC_ACT_PIPE) act_id = FLOW_ACTION_PIPE; + else if (tc_act == TC_ACT_RECLASSIFY) + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"reclassify\""); + else + NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload"); } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) { act_id = FLOW_ACTION_GOTO; *extval = tc_act & TC_ACT_EXT_VAL_MASK; } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) { act_id = FLOW_ACTION_JUMP; *extval = tc_act & TC_ACT_EXT_VAL_MASK; + } else if (tc_act == TC_ACT_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"continue\""); + } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload"); } return act_id; } static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -466,14 +476,16 @@ static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, entry->police.mtu = tcf_police_tcfp_mtu(act); act_id = tcf_police_act_to_flow_act(police->tcf_action, - &entry->police.exceed.extval); + &entry->police.exceed.extval, + extack); if (act_id < 0) return act_id; entry->police.exceed.act_id = act_id; act_id = tcf_police_act_to_flow_act(p->tcfp_result, - &entry->police.notexceed.extval); + &entry->police.notexceed.extval, + extack); if (act_id < 0) return act_id; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 9a22cdda6bbd..2f7f5e44d28c 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -291,7 +291,8 @@ static void tcf_offload_sample_get_group(struct flow_action_entry *entry, } static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ceba11b198bb..e3bd11dfe1ca 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -23,6 +23,20 @@ static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; +static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params, + struct sk_buff *skb) +{ + u16 queue_mapping = params->queue_mapping; + + if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { + u32 hash = skb_get_hash(skb); + + queue_mapping += hash % params->mapping_mod; + } + + return netdev_cap_txqueue(skb->dev, queue_mapping); +} + static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -58,8 +72,12 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, } } if (params->flags & SKBEDIT_F_QUEUE_MAPPING && - skb->dev->real_num_tx_queues > params->queue_mapping) - skb_set_queue_mapping(skb, params->queue_mapping); + skb->dev->real_num_tx_queues > params->queue_mapping) { +#ifdef CONFIG_NET_EGRESS + netdev_xmit_skip_txqueue(true); +#endif + skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb)); + } if (params->flags & SKBEDIT_F_MARK) { skb->mark &= ~params->mask; skb->mark |= params->mark & params->mask; @@ -92,6 +110,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_FLAGS] = { .len = sizeof(u64) }, + [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -108,6 +127,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; + u16 mapping_mod = 1; bool exists = false; int ret = 0, err; u32 index; @@ -153,6 +173,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (tb[TCA_SKBEDIT_FLAGS] != NULL) { u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]); + if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) { + u16 *queue_mapping_max; + + if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] || + !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) { + NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping."); + return -EINVAL; + } + + queue_mapping_max = + nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]); + if (*queue_mapping_max < *queue_mapping) { + NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min."); + return -EINVAL; + } + + mapping_mod = *queue_mapping_max - *queue_mapping + 1; + flags |= SKBEDIT_F_TXQ_SKBHASH; + } if (*pure_flags & SKBEDIT_F_INHERITDSFIELD) flags |= SKBEDIT_F_INHERITDSFIELD; } @@ -204,8 +243,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new->flags = flags; if (flags & SKBEDIT_F_PRIORITY) params_new->priority = *priority; - if (flags & SKBEDIT_F_QUEUE_MAPPING) + if (flags & SKBEDIT_F_QUEUE_MAPPING) { params_new->queue_mapping = *queue_mapping; + params_new->mapping_mod = mapping_mod; + } if (flags & SKBEDIT_F_MARK) params_new->mark = *mark; if (flags & SKBEDIT_F_PTYPE) @@ -272,6 +313,13 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, goto nla_put_failure; if (params->flags & SKBEDIT_F_INHERITDSFIELD) pure_flags |= SKBEDIT_F_INHERITDSFIELD; + if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { + if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX, + params->queue_mapping + params->mapping_mod - 1)) + goto nla_put_failure; + + pure_flags |= SKBEDIT_F_TXQ_SKBHASH; + } if (pure_flags != 0 && nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags)) goto nla_put_failure; @@ -321,6 +369,7 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_skbedit)) + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */ + + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */ @@ -328,7 +377,8 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) } static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -342,7 +392,14 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data } else if (is_tcf_skbedit_priority(act)) { entry->id = FLOW_ACTION_PRIORITY; entry->priority = tcf_skbedit_priority(act); + } else if (is_tcf_skbedit_queue_mapping(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used"); + return -EOPNOTSUPP; + } else if (is_tcf_skbedit_inheritdsfield(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used"); + return -EOPNOTSUPP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 23aba03d26a8..856dc23cef8c 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -808,7 +808,8 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, - bool bind) + bool bind, + struct netlink_ext_ack *extack) { int err; @@ -823,6 +824,7 @@ static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel key mode offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 883454c4f921..68b5e772386a 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -369,7 +369,8 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act) } static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -398,6 +399,7 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act); break; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported vlan action mode offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f0699f39afdb..9bb4d3dcc994 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3513,20 +3513,25 @@ EXPORT_SYMBOL(tc_cleanup_offload_action); static int tc_setup_offload_act(struct tc_action *act, struct flow_action_entry *entry, - u32 *index_inc) + u32 *index_inc, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT - if (act->ops->offload_act_setup) - return act->ops->offload_act_setup(act, entry, index_inc, true); - else + if (act->ops->offload_act_setup) { + return act->ops->offload_act_setup(act, entry, index_inc, true, + extack); + } else { + NL_SET_ERR_MSG(extack, "Action does not support offload"); return -EOPNOTSUPP; + } #else return 0; #endif } int tc_setup_action(struct flow_action *flow_action, - struct tc_action *actions[]) + struct tc_action *actions[], + struct netlink_ext_ack *extack) { int i, j, index, err = 0; struct tc_action *act; @@ -3551,7 +3556,7 @@ int tc_setup_action(struct flow_action *flow_action, entry->hw_stats = tc_act_hw_stats(act->hw_stats); entry->hw_index = act->tcfa_index; index = 0; - err = tc_setup_offload_act(act, entry, &index); + err = tc_setup_offload_act(act, entry, &index, extack); if (!err) j += index; else @@ -3570,13 +3575,14 @@ err_out_locked: } int tc_setup_offload_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT if (!exts) return 0; - return tc_setup_action(flow_action, exts->actions); + return tc_setup_action(flow_action, exts->actions, extack); #else return 0; #endif diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index ed5e6f08e74a..dcca70144dff 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -72,6 +72,7 @@ struct fl_flow_key { } tp_range; struct flow_dissector_key_ct ct; struct flow_dissector_key_hash hash; + struct flow_dissector_key_num_of_vlans num_of_vlans; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -464,14 +465,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts, + cls_flower.common.extack); if (err) { kfree(cls_flower.rule); - if (skip_sw) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + + return skip_sw ? err : 0; } err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, @@ -714,6 +713,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_NUM_OF_VLANS] = { .type = NLA_U8 }, }; @@ -1030,8 +1030,10 @@ static void fl_set_key_vlan(struct nlattr **tb, VLAN_PRIORITY_MASK; key_mask->vlan_priority = VLAN_PRIORITY_MASK; } - key_val->vlan_tpid = ethertype; - key_mask->vlan_tpid = cpu_to_be16(~0); + if (ethertype) { + key_val->vlan_tpid = ethertype; + key_mask->vlan_tpid = cpu_to_be16(~0); + } if (tb[vlan_next_eth_type_key]) { key_val->vlan_eth_type = nla_get_be16(tb[vlan_next_eth_type_key]); @@ -1581,6 +1583,26 @@ static int fl_set_key_ct(struct nlattr **tb, return 0; } +static bool is_vlan_key(struct nlattr *tb, __be16 *ethertype, + struct fl_flow_key *key, struct fl_flow_key *mask, + int vthresh) +{ + const bool good_num_of_vlans = key->num_of_vlans.num_of_vlans > vthresh; + + if (!tb) { + *ethertype = 0; + return good_num_of_vlans; + } + + *ethertype = nla_get_be16(tb); + if (good_num_of_vlans || eth_type_vlan(*ethertype)) + return true; + + key->basic.n_proto = *ethertype; + mask->basic.n_proto = cpu_to_be16(~0); + return false; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1602,37 +1624,30 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); - - if (tb[TCA_FLOWER_KEY_ETH_TYPE]) { - ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]); - - if (eth_type_vlan(ethertype)) { - fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, - TCA_FLOWER_KEY_VLAN_ETH_TYPE, - &key->vlan, &mask->vlan); - - if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { - ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); - if (eth_type_vlan(ethertype)) { - fl_set_key_vlan(tb, ethertype, - TCA_FLOWER_KEY_CVLAN_ID, - TCA_FLOWER_KEY_CVLAN_PRIO, - TCA_FLOWER_KEY_CVLAN_ETH_TYPE, - &key->cvlan, &mask->cvlan); - fl_set_key_val(tb, &key->basic.n_proto, - TCA_FLOWER_KEY_CVLAN_ETH_TYPE, - &mask->basic.n_proto, - TCA_FLOWER_UNSPEC, - sizeof(key->basic.n_proto)); - } else { - key->basic.n_proto = ethertype; - mask->basic.n_proto = cpu_to_be16(~0); - } - } - } else { - key->basic.n_proto = ethertype; - mask->basic.n_proto = cpu_to_be16(~0); + fl_set_key_val(tb, &key->num_of_vlans, + TCA_FLOWER_KEY_NUM_OF_VLANS, + &mask->num_of_vlans, + TCA_FLOWER_UNSPEC, + sizeof(key->num_of_vlans)); + + if (is_vlan_key(tb[TCA_FLOWER_KEY_ETH_TYPE], ðertype, key, mask, 0)) { + fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &key->vlan, &mask->vlan); + + if (is_vlan_key(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE], + ðertype, key, mask, 1)) { + fl_set_key_vlan(tb, ethertype, + TCA_FLOWER_KEY_CVLAN_ID, + TCA_FLOWER_KEY_CVLAN_PRIO, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, + &key->cvlan, &mask->cvlan); + fl_set_key_val(tb, &key->basic.n_proto, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, + &mask->basic.n_proto, + TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); } } @@ -1906,6 +1921,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_CT, ct); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_HASH, hash); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, num_of_vlans); skb_flow_dissector_init(dissector, keys, cnt); } @@ -2362,11 +2379,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts, + cls_flower.common.extack); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); __fl_put(f); return err; } @@ -2994,6 +3011,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, sizeof(key->basic.n_proto))) goto nla_put_failure; + if (mask->num_of_vlans.num_of_vlans) { + if (nla_put_u8(skb, TCA_FLOWER_KEY_NUM_OF_VLANS, key->num_of_vlans.num_of_vlans)) + goto nla_put_failure; + } + if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls)) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index ca5670fd5228..06cf22adbab7 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,16 +97,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts, + cls_mall.common.extack); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); - if (skip_sw) - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - else - err = 0; - return err; + return skip_sw ? err : 0; } err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, @@ -302,14 +299,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts, + cls_mall.common.extack); if (err) { kfree(cls_mall.rule); - if (add && tc_skip_sw(head->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + + return add && tc_skip_sw(head->flags) ? err : 0; } err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5bab9f8b8f45..dba0b3e24af5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1019,22 +1019,14 @@ EXPORT_SYMBOL(qdisc_create_dflt); void qdisc_reset(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; - struct sk_buff *skb, *tmp; trace_qdisc_reset(qdisc); if (ops->reset) ops->reset(qdisc); - skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { - __skb_unlink(skb, &qdisc->gso_skb); - kfree_skb_list(skb); - } - - skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { - __skb_unlink(skb, &qdisc->skb_bad_txq); - kfree_skb_list(skb); - } + __skb_queue_purge(&qdisc->gso_skb); + __skb_queue_purge(&qdisc->skb_bad_txq); qdisc->q.qlen = 0; qdisc->qstats.backlog = 0; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 470dbdc27d58..d081858c2d07 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -926,7 +926,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) return 1; /* v4-mapped-v6 addresses */ case AF_INET: - if (!__ipv6_only_sock(sctp_opt2sk(sp))) + if (!ipv6_only_sock(sctp_opt2sk(sp))) return 1; fallthrough; default: @@ -952,7 +952,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, return 0; /* If the socket is IPv6 only, v4 addrs will not match */ - if (__ipv6_only_sock(sk) && af1 != af2) + if (ipv6_only_sock(sk) && af1 != af2) return 0; /* Today, wildcard AF_INET/AF_INET6. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7b0427658056..6d37d2dfb3da 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2084,7 +2084,7 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * 5 for complete description of the flags. */ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -2093,9 +2093,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int err = 0; int skb_len; - pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, " - "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags, - addr_len); + pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n", + __func__, sk, msg, len, flags, addr_len); lock_sock(sk); @@ -2105,7 +2104,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; } - skb = sctp_skb_recv_datagram(sk, flags, noblock, &err); + skb = sctp_skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -2129,7 +2128,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, head_skb = event->chunk->head_skb; else head_skb = skb; - sock_recv_ts_and_drops(msg, sk, head_skb); + sock_recv_cmsgs(msg, sk, head_skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); @@ -8978,14 +8977,13 @@ out: * Note: This is pretty much the same routine as in core/datagram.c * with a few changes to make lksctp work. */ -struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, - int noblock, int *err) +struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err) { int error; struct sk_buff *skb; long timeo; - timeo = sock_rcvtimeo(sk, noblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo, MAX_SCHEDULE_TIMEOUT); @@ -9018,7 +9016,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, break; if (sk_can_busy_loop(sk)) { - sk_busy_loop(sk, noblock); + sk_busy_loop(sk, flags & MSG_DONTWAIT); if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) continue; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 0c3d2b4d7321..8920ca92a011 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -1063,7 +1063,7 @@ void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, struct sk_buff *skb; int err; - skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err); + skb = sctp_skb_recv_datagram(sk, MSG_PEEK | MSG_DONTWAIT, &err); if (skb != NULL) { __sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb), msghdr, skb); diff --git a/net/socket.c b/net/socket.c index 6887840682bb..f0c39c874665 100644 --- a/net/socket.c +++ b/net/socket.c @@ -930,13 +930,22 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static void sock_recv_mark(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVMARK) && skb) + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), + &skb->mark); +} + +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); + sock_recv_mark(msg, sk, skb); } -EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, size_t, int)); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cc35ec433400..45336e68bf79 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -464,7 +464,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err < 0) goto out_recv_err; - skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err); if (!skb) goto out_recv_err; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ab64ea46870..5c91c5457197 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1355,7 +1355,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_udp(sk, 0, 1, &err); + skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); if (skb == NULL) break; xs_udp_data_read_skb(&transport->xprt, sk, skb); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index af875ad4a822..b12f81a2b44c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -964,11 +964,9 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, tls_ctx->rx.rec_seq, rxm->full_len, is_encrypted, is_decrypted); - ctx->sw.decrypted |= is_decrypted; - if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { if (likely(is_encrypted || is_decrypted)) - return 0; + return is_decrypted; /* After tls_device_down disables the offload, the next SKB will * likely have initial fragments decrypted, and final ones not @@ -983,7 +981,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, */ if (is_decrypted) { ctx->resync_nh_reset = 1; - return 0; + return is_decrypted; } if (is_encrypted) { tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a8976ef95528..939d1673f508 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -44,6 +44,11 @@ #include <net/strparser.h> #include <net/tls.h> +struct tls_decrypt_arg { + bool zc; + bool async; +}; + noinline void tls_err_abort(struct sock *sk, int err) { WARN_ON_ONCE(err >= 0); @@ -128,32 +133,31 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len) return __skb_nsg(skb, offset, len, 0); } -static int padding_length(struct tls_sw_context_rx *ctx, - struct tls_prot_info *prot, struct sk_buff *skb) +static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb) { struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); int sub = 0; /* Determine zero-padding length */ if (prot->version == TLS_1_3_VERSION) { + int offset = rxm->full_len - TLS_TAG_SIZE - 1; char content_type = 0; int err; - int back = 17; while (content_type == 0) { - if (back > rxm->full_len - prot->prepend_size) + if (offset < prot->prepend_size) return -EBADMSG; - err = skb_copy_bits(skb, - rxm->offset + rxm->full_len - back, + err = skb_copy_bits(skb, rxm->offset + offset, &content_type, 1); if (err) return err; if (content_type) break; sub++; - back++; + offset--; } - ctx->control = content_type; + tlm->control = content_type; } return sub; } @@ -169,7 +173,6 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) struct scatterlist *sg; struct sk_buff *skb; unsigned int pages; - int pending; skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); @@ -185,17 +188,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) tls_err_abort(skb->sk, err); } else { struct strp_msg *rxm = strp_msg(skb); - int pad; - pad = padding_length(ctx, prot, skb); - if (pad < 0) { - ctx->async_wait.err = pad; - tls_err_abort(skb->sk, pad); - } else { - rxm->full_len -= pad; - rxm->offset += prot->prepend_size; - rxm->full_len -= prot->overhead_size; - } + /* No TLS 1.3 support with async crypto */ + WARN_ON(prot->tail_size); + + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; } /* After using skb->sk to propagate sk through crypto async callback @@ -217,9 +215,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) kfree(aead_req); spin_lock_bh(&ctx->decrypt_compl_lock); - pending = atomic_dec_return(&ctx->decrypt_pending); - - if (!pending && ctx->async_notify) + if (!atomic_dec_return(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); spin_unlock_bh(&ctx->decrypt_compl_lock); } @@ -231,7 +227,7 @@ static int tls_do_decryption(struct sock *sk, char *iv_recv, size_t data_len, struct aead_request *aead_req, - bool async) + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -244,7 +240,7 @@ static int tls_do_decryption(struct sock *sk, data_len + prot->tag_size, (u8 *)iv_recv); - if (async) { + if (darg->async) { /* Using skb->sk to push sk through to crypto async callback * handler. This allows propagating errors up to the socket * if needed. It _must_ be cleared in the async handler @@ -264,14 +260,15 @@ static int tls_do_decryption(struct sock *sk, ret = crypto_aead_decrypt(aead_req); if (ret == -EINPROGRESS) { - if (async) - return ret; + if (darg->async) + return 0; ret = crypto_wait_req(ret, &ctx->async_wait); } + darg->async = false; - if (async) - atomic_dec(&ctx->decrypt_pending); + if (ret == -EBADMSG) + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); return ret; } @@ -1346,15 +1343,14 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, return skb; } -static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, +static int tls_setup_from_iter(struct iov_iter *from, int length, int *pages_used, - unsigned int *size_used, struct scatterlist *to, int to_max_pages) { int rc = 0, i = 0, num_elem = *pages_used, maxpages; struct page *pages[MAX_SKB_FRAGS]; - unsigned int size = *size_used; + unsigned int size = 0; ssize_t copied, use; size_t offset; @@ -1397,8 +1393,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, sg_mark_end(&to[num_elem - 1]); out: if (rc) - iov_iter_revert(from, size - *size_used); - *size_used = size; + iov_iter_revert(from, size); *pages_used = num_elem; return rc; @@ -1415,12 +1410,13 @@ out: static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct iov_iter *out_iov, struct scatterlist *out_sg, - int *chunk, bool *zc, bool async) + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0; struct aead_request *aead_req; struct sk_buff *unused; @@ -1431,7 +1427,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, prot->tail_size; int iv_offset = 0; - if (*zc && (out_iov || out_sg)) { + if (darg->zc && (out_iov || out_sg)) { if (out_iov) n_sgout = 1 + iov_iter_npages_cap(out_iov, INT_MAX, data_len); @@ -1441,7 +1437,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, rxm->full_len - prot->prepend_size); } else { n_sgout = 0; - *zc = false; + darg->zc = false; n_sgin = skb_cow_data(skb, 0, &unused); } @@ -1456,7 +1452,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); mem_size = aead_size + (nsg * sizeof(struct scatterlist)); mem_size = mem_size + prot->aad_size; - mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv); + mem_size = mem_size + MAX_IV_SIZE; /* Allocate a single block of memory which contains * aead_req || sgin[] || sgout[] || aad || iv. @@ -1486,26 +1482,26 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, } /* Prepare IV */ - err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, - iv + iv_offset + prot->salt_size, - prot->iv_size); - if (err < 0) { - kfree(mem); - return err; - } if (prot->version == TLS_1_3_VERSION || - prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) + prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) { memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->iv_size + prot->salt_size); - else + } else { + err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, + iv + iv_offset + prot->salt_size, + prot->iv_size); + if (err < 0) { + kfree(mem); + return err; + } memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - + } xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + prot->tail_size, - tls_ctx->rx.rec_seq, ctx->control, prot); + tls_ctx->rx.rec_seq, tlm->control, prot); /* Prepare sgin */ sg_init_table(sgin, n_sgin); @@ -1523,9 +1519,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, sg_init_table(sgout, n_sgout); sg_set_buf(&sgout[0], aad, prot->aad_size); - *chunk = 0; - err = tls_setup_from_iter(sk, out_iov, data_len, - &pages, chunk, &sgout[1], + err = tls_setup_from_iter(out_iov, data_len, + &pages, &sgout[1], (n_sgout - 1)); if (err < 0) goto fallback_to_reg_recv; @@ -1538,15 +1533,14 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, fallback_to_reg_recv: sgout = sgin; pages = 0; - *chunk = data_len; - *zc = false; + darg->zc = false; } /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, skb, sgin, sgout, iv, - data_len, aead_req, async); - if (err == -EINPROGRESS) - return err; + data_len, aead_req, darg); + if (darg->async) + return 0; /* Release the pages in case iov was mapped to pages */ for (; pages > 0; pages--) @@ -1557,87 +1551,83 @@ fallback_to_reg_recv: } static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, - struct iov_iter *dest, int *chunk, bool *zc, - bool async) + struct iov_iter *dest, + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm = strp_msg(skb); - int pad, err = 0; + struct tls_msg *tlm = tls_msg(skb); + int pad, err; - if (!ctx->decrypted) { - if (tls_ctx->rx_conf == TLS_HW) { - err = tls_device_decrypted(sk, tls_ctx, skb, rxm); - if (err < 0) - return err; - } + if (tlm->decrypted) { + darg->zc = false; + darg->async = false; + return 0; + } - /* Still not decrypted after tls_device */ - if (!ctx->decrypted) { - err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, - async); - if (err < 0) { - if (err == -EINPROGRESS) - tls_advance_record_sn(sk, prot, - &tls_ctx->rx); - else if (err == -EBADMSG) - TLS_INC_STATS(sock_net(sk), - LINUX_MIB_TLSDECRYPTERROR); - return err; - } - } else { - *zc = false; + if (tls_ctx->rx_conf == TLS_HW) { + err = tls_device_decrypted(sk, tls_ctx, skb, rxm); + if (err < 0) + return err; + if (err > 0) { + tlm->decrypted = 1; + darg->zc = false; + darg->async = false; + goto decrypt_done; } + } - pad = padding_length(ctx, prot, skb); - if (pad < 0) - return pad; + err = decrypt_internal(sk, skb, dest, NULL, darg); + if (err < 0) + return err; + if (darg->async) + goto decrypt_next; - rxm->full_len -= pad; - rxm->offset += prot->prepend_size; - rxm->full_len -= prot->overhead_size; - tls_advance_record_sn(sk, prot, &tls_ctx->rx); - ctx->decrypted = 1; - ctx->saved_data_ready(sk); - } else { - *zc = false; - } +decrypt_done: + pad = padding_length(prot, skb); + if (pad < 0) + return pad; - return err; + rxm->full_len -= pad; + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; + tlm->decrypted = 1; +decrypt_next: + tls_advance_record_sn(sk, prot, &tls_ctx->rx); + + return 0; } int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout) { - bool zc = true; - int chunk; + struct tls_decrypt_arg darg = { .zc = true, }; - return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false); + return decrypt_internal(sk, skb, NULL, sgout, &darg); } -static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, - unsigned int len) +static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, + u8 *control) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - - if (skb) { - struct strp_msg *rxm = strp_msg(skb); - - if (len < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; - return false; + int err; + + if (!*control) { + *control = tlm->control; + if (!*control) + return -EBADMSG; + + err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, + sizeof(*control), control); + if (*control != TLS_RECORD_TYPE_DATA) { + if (err || msg->msg_flags & MSG_CTRUNC) + return -EIO; } - consume_skb(skb); + } else if (*control != tlm->control) { + return 0; } - /* Finished with message */ - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); - - return true; + return 1; } /* This function traverses the rx_list in tls receive context to copies the @@ -1648,31 +1638,23 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, static int process_rx_list(struct tls_sw_context_rx *ctx, struct msghdr *msg, u8 *control, - bool *cmsg, size_t skip, size_t len, bool zc, bool is_peek) { struct sk_buff *skb = skb_peek(&ctx->rx_list); - u8 ctrl = *control; - u8 msgc = *cmsg; struct tls_msg *tlm; ssize_t copied = 0; - - /* Set the record type in 'control' if caller didn't pass it */ - if (!ctrl && skb) { - tlm = tls_msg(skb); - ctrl = tlm->control; - } + int err; while (skip && skb) { struct strp_msg *rxm = strp_msg(skb); tlm = tls_msg(skb); - /* Cannot process a record of different type */ - if (ctrl != tlm->control) - return 0; + err = tls_record_content_type(msg, tlm, control); + if (err <= 0) + goto out; if (skip < rxm->full_len) break; @@ -1688,30 +1670,15 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, tlm = tls_msg(skb); - /* Cannot process a record of different type */ - if (ctrl != tlm->control) - return 0; - - /* Set record type if not already done. For a non-data record, - * do not proceed if record type could not be copied. - */ - if (!msgc) { - int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, - sizeof(ctrl), &ctrl); - msgc = true; - if (ctrl != TLS_RECORD_TYPE_DATA) { - if (cerr || msg->msg_flags & MSG_CTRUNC) - return -EIO; - - *cmsg = msgc; - } - } + err = tls_record_content_type(msg, tlm, control); + if (err <= 0) + goto out; if (!zc || (rxm->full_len - skip) > len) { - int err = skb_copy_datagram_msg(skb, rxm->offset + skip, + err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - return err; + goto out; } len = len - chunk; @@ -1738,21 +1705,21 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, next_skb = skb_peek_next(skb, &ctx->rx_list); if (!is_peek) { - skb_unlink(skb, &ctx->rx_list); + __skb_unlink(skb, &ctx->rx_list); consume_skb(skb); } skb = next_skb; } + err = 0; - *control = ctrl; - return copied; +out: + return copied ? : err; } int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) { @@ -1766,16 +1733,13 @@ int tls_sw_recvmsg(struct sock *sk, struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; - bool cmsg = false; + bool async = false; int target, err = 0; long timeo; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; bool bpf_strp_enabled; - int num_async = 0; - int pending; - - flags |= nonblock; + bool zc_capable; if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); @@ -1784,81 +1748,64 @@ int tls_sw_recvmsg(struct sock *sk, lock_sock(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); + /* If crypto failed the connection is broken */ + err = ctx->async_wait.err; + if (err) + goto end; + /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false, - is_peek); - if (err < 0) { - tls_err_abort(sk, err); + err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek); + if (err < 0) goto end; - } else { - copied = err; - } + copied = err; if (len <= copied) - goto recv_end; + goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); len = len - copied; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && + prot->version != TLS_1_3_VERSION; + decrypted = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { - bool retain_skb = false; - bool zc = false; - int to_decrypt; - int chunk = 0; - bool async_capable; - bool async = false; + struct tls_decrypt_arg darg = {}; + int to_decrypt, chunk; skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); if (!skb) { if (psock) { - int ret = sk_msg_recvmsg(sk, psock, msg, len, - flags); - - if (ret > 0) { - decrypted += ret; - len -= ret; - continue; - } + chunk = sk_msg_recvmsg(sk, psock, msg, len, + flags); + if (chunk > 0) + goto leave_on_list; } goto recv_end; - } else { - tlm = tls_msg(skb); - if (prot->version == TLS_1_3_VERSION) - tlm->control = 0; - else - tlm->control = ctx->control; } rxm = strp_msg(skb); + tlm = tls_msg(skb); to_decrypt = rxm->full_len - prot->overhead_size; - if (to_decrypt <= len && !is_kvec && !is_peek && - ctx->control == TLS_RECORD_TYPE_DATA && - prot->version != TLS_1_3_VERSION && - !bpf_strp_enabled) - zc = true; + if (zc_capable && to_decrypt <= len && + tlm->control == TLS_RECORD_TYPE_DATA) + darg.zc = true; /* Do not use async mode if record is non-data */ - if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) - async_capable = ctx->async_capable; + if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) + darg.async = ctx->async_capable; else - async_capable = false; + darg.async = false; - err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, async_capable); - if (err < 0 && err != -EINPROGRESS) { + err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg); + if (err < 0) { tls_err_abort(sk, -EBADMSG); goto recv_end; } - if (err == -EINPROGRESS) { - async = true; - num_async++; - } else if (prot->version == TLS_1_3_VERSION) { - tlm->control = ctx->control; - } + async |= darg.async; /* If the type of records being processed is not known yet, * set it to record type just dequeued. If it is already known, @@ -1867,131 +1814,105 @@ int tls_sw_recvmsg(struct sock *sk, * is known just after record is dequeued from stream parser. * For tls1.3, we disable async. */ - - if (!control) - control = tlm->control; - else if (control != tlm->control) + err = tls_record_content_type(msg, tlm, &control); + if (err <= 0) goto recv_end; - if (!cmsg) { - int cerr; - - cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, - sizeof(control), &control); - cmsg = true; - if (control != TLS_RECORD_TYPE_DATA) { - if (cerr || msg->msg_flags & MSG_CTRUNC) { - err = -EIO; - goto recv_end; - } - } + ctx->recv_pkt = NULL; + __strp_unpause(&ctx->strp); + __skb_queue_tail(&ctx->rx_list, skb); + + if (async) { + /* TLS 1.2-only, to_decrypt must be text length */ + chunk = min_t(int, to_decrypt, len); +leave_on_list: + decrypted += chunk; + len -= chunk; + continue; } + /* TLS 1.3 may have updated the length by more than overhead */ + chunk = rxm->full_len; - if (async) - goto pick_next_record; + if (!darg.zc) { + bool partially_consumed = chunk > len; - if (!zc) { if (bpf_strp_enabled) { err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; rxm->full_len = 0; + __skb_unlink(skb, &ctx->rx_list); if (err == __SK_DROP) consume_skb(skb); - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); continue; } } - if (rxm->full_len > len) { - retain_skb = true; + if (partially_consumed) chunk = len; - } else { - chunk = rxm->full_len; - } err = skb_copy_datagram_msg(skb, rxm->offset, msg, chunk); if (err < 0) goto recv_end; - if (!is_peek) { - rxm->offset = rxm->offset + chunk; - rxm->full_len = rxm->full_len - chunk; + if (is_peek) + goto leave_on_list; + + if (partially_consumed) { + rxm->offset += chunk; + rxm->full_len -= chunk; + goto leave_on_list; } } -pick_next_record: - if (chunk > len) - chunk = len; - decrypted += chunk; len -= chunk; - /* For async or peek case, queue the current skb */ - if (async || is_peek || retain_skb) { - skb_queue_tail(&ctx->rx_list, skb); - skb = NULL; - } + __skb_unlink(skb, &ctx->rx_list); + consume_skb(skb); - if (tls_sw_advance_skb(sk, skb, chunk)) { - /* Return full control message to - * userspace before trying to parse - * another message type - */ - msg->msg_flags |= MSG_EOR; - if (control != TLS_RECORD_TYPE_DATA) - goto recv_end; - } else { + /* Return full control message to userspace before trying + * to parse another message type + */ + msg->msg_flags |= MSG_EOR; + if (control != TLS_RECORD_TYPE_DATA) break; - } } recv_end: - if (num_async) { + if (async) { + int ret, pending; + /* Wait for all previously submitted records to be decrypted */ spin_lock_bh(&ctx->decrypt_compl_lock); - ctx->async_notify = true; + reinit_completion(&ctx->async_wait.completion); pending = atomic_read(&ctx->decrypt_pending); spin_unlock_bh(&ctx->decrypt_compl_lock); if (pending) { - err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - if (err) { - /* one of async decrypt failed */ - tls_err_abort(sk, err); - copied = 0; + ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + if (ret) { + if (err >= 0 || err == -EINPROGRESS) + err = ret; decrypted = 0; goto end; } - } else { - reinit_completion(&ctx->async_wait.completion); } - /* There can be no concurrent accesses, since we have no - * pending decrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, &cmsg, copied, + err = process_rx_list(ctx, msg, &control, copied, decrypted, false, is_peek); else - err = process_rx_list(ctx, msg, &control, &cmsg, 0, + err = process_rx_list(ctx, msg, &control, 0, decrypted, true, is_peek); - if (err < 0) { - tls_err_abort(sk, err); - copied = 0; - goto end; - } + decrypted = max(err, 0); } copied += decrypted; end: release_sock(sk); - sk_defer_free_flush(sk); if (psock) sk_psock_put(sk, psock); return copied ? : err; @@ -2005,13 +1926,13 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm = NULL; struct sock *sk = sock->sk; + struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; bool from_queue; int err = 0; long timeo; int chunk; - bool zc = false; lock_sock(sk); @@ -2021,26 +1942,29 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (from_queue) { skb = __skb_dequeue(&ctx->rx_list); } else { + struct tls_decrypt_arg darg = {}; + skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err); if (!skb) goto splice_read_end; - err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); + err = decrypt_skb_update(sk, skb, NULL, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto splice_read_end; } } + rxm = strp_msg(skb); + tlm = tls_msg(skb); + /* splice does not support reading control messages */ - if (ctx->control != TLS_RECORD_TYPE_DATA) { + if (tlm->control != TLS_RECORD_TYPE_DATA) { err = -EINVAL; goto splice_read_end; } - rxm = strp_msg(skb); - chunk = min_t(unsigned int, rxm->full_len, len); copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags); if (copied < 0) @@ -2060,7 +1984,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, splice_read_end: release_sock(sk); - sk_defer_free_flush(sk); return copied ? : err; } @@ -2084,10 +2007,10 @@ bool tls_sw_sock_is_readable(struct sock *sk) static int tls_read_size(struct strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); size_t cipher_overhead; size_t data_len = 0; int ret; @@ -2104,11 +2027,11 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) /* Linearize header to local buffer */ ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size); - if (ret < 0) goto read_failure; - ctx->control = header[0]; + tlm->decrypted = 0; + tlm->control = header[0]; data_len = ((header[4] & 0xFF) | (header[3] << 8)); @@ -2149,8 +2072,6 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb) struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - ctx->decrypted = 0; - ctx->recv_pkt = skb; strp_pause(strp); @@ -2241,7 +2162,7 @@ void tls_sw_release_resources_rx(struct sock *sk) if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; - skb_queue_purge(&ctx->rx_list); + __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); strp_stop(&ctx->strp); /* If tls_sw_strparser_arm() was not called (cleanup paths) @@ -2501,7 +2422,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) /* Sanity-check the sizes for stack allocations. */ if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE || - rec_seq_size > TLS_MAX_REC_SEQ_SIZE) { + rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE) { rc = -EINVAL; goto free_priv; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e71a312faa1e..e1dd9e9c8452 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1643,7 +1643,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, * so that no locks are necessary. */ - skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); + skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &err); if (!skb) { /* This means receive shutdown. */ if (err == 0) @@ -2483,8 +2484,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_dgram_proto) - return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, NULL); + return prot->recvmsg(sk, msg, size, flags, NULL); #endif return __unix_dgram_recvmsg(sk, msg, size, flags); } @@ -2500,7 +2500,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, int used, err; mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); mutex_unlock(&u->iolock); if (!skb) return err; @@ -2916,8 +2916,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_stream_proto) - return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, NULL); + return prot->recvmsg(sk, msg, size, flags, NULL); #endif return unix_stream_read_generic(&state, true); } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 452376c6f419..7cf14c6b1725 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -48,8 +48,7 @@ static int __unix_recvmsg(struct sock *sk, struct msghdr *msg, } static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, - int *addr_len) + size_t len, int flags, int *addr_len) { struct unix_sock *u = unix_sk(sk); struct sk_psock *psock; @@ -73,7 +72,7 @@ msg_bytes_ready: long timeo; int data; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = unix_msg_wait_data(sk, psock, timeo); if (data) { if (!sk_psock_queue_empty(psock)) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index ba1c8cc0c467..ad64f403536a 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -566,67 +566,28 @@ out: mutex_unlock(&vsock->rx_lock); } -static int virtio_vsock_probe(struct virtio_device *vdev) +static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) { - vq_callback_t *callbacks[] = { - virtio_vsock_rx_done, - virtio_vsock_tx_done, - virtio_vsock_event_done, - }; + struct virtio_device *vdev = vsock->vdev; static const char * const names[] = { "rx", "tx", "event", }; - struct virtio_vsock *vsock = NULL; + vq_callback_t *callbacks[] = { + virtio_vsock_rx_done, + virtio_vsock_tx_done, + virtio_vsock_event_done, + }; int ret; - ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); - if (ret) - return ret; - - /* Only one virtio-vsock device per guest is supported */ - if (rcu_dereference_protected(the_virtio_vsock, - lockdep_is_held(&the_virtio_vsock_mutex))) { - ret = -EBUSY; - goto out; - } - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) { - ret = -ENOMEM; - goto out; - } - - vsock->vdev = vdev; - - ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names, + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names, NULL); if (ret < 0) - goto out; + return ret; virtio_vsock_update_guest_cid(vsock); - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); - - mutex_init(&vsock->tx_lock); - mutex_init(&vsock->rx_lock); - mutex_init(&vsock->event_lock); - spin_lock_init(&vsock->send_pkt_list_lock); - INIT_LIST_HEAD(&vsock->send_pkt_list); - INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); - INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); - INIT_WORK(&vsock->event_work, virtio_transport_event_work); - INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); - - if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) - vsock->seqpacket_allow = true; - - vdev->priv = vsock; - virtio_device_ready(vdev); mutex_lock(&vsock->tx_lock); @@ -643,30 +604,15 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - rcu_assign_pointer(the_virtio_vsock, vsock); - - mutex_unlock(&the_virtio_vsock_mutex); - return 0; - -out: - kfree(vsock); - mutex_unlock(&the_virtio_vsock_mutex); - return ret; } -static void virtio_vsock_remove(struct virtio_device *vdev) +static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) { - struct virtio_vsock *vsock = vdev->priv; + struct virtio_device *vdev = vsock->vdev; struct virtio_vsock_pkt *pkt; - mutex_lock(&the_virtio_vsock_mutex); - - vdev->priv = NULL; - rcu_assign_pointer(the_virtio_vsock, NULL); - synchronize_rcu(); - - /* Reset all connected sockets when the device disappear */ + /* Reset all connected sockets when the VQs disappear */ vsock_for_each_connected_socket(&virtio_transport.transport, virtio_vsock_reset_sock); @@ -711,6 +657,78 @@ static void virtio_vsock_remove(struct virtio_device *vdev) /* Delete virtqueues and flush outstanding callbacks if any */ vdev->config->del_vqs(vdev); +} + +static int virtio_vsock_probe(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = NULL; + int ret; + + ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); + if (ret) + return ret; + + /* Only one virtio-vsock device per guest is supported */ + if (rcu_dereference_protected(the_virtio_vsock, + lockdep_is_held(&the_virtio_vsock_mutex))) { + ret = -EBUSY; + goto out; + } + + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); + if (!vsock) { + ret = -ENOMEM; + goto out; + } + + vsock->vdev = vdev; + + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + atomic_set(&vsock->queued_replies, 0); + + mutex_init(&vsock->tx_lock); + mutex_init(&vsock->rx_lock); + mutex_init(&vsock->event_lock); + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); + INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); + INIT_WORK(&vsock->event_work, virtio_transport_event_work); + INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); + + if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) + vsock->seqpacket_allow = true; + + vdev->priv = vsock; + + ret = virtio_vsock_vqs_init(vsock); + if (ret < 0) + goto out; + + rcu_assign_pointer(the_virtio_vsock, vsock); + + mutex_unlock(&the_virtio_vsock_mutex); + + return 0; + +out: + kfree(vsock); + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} + +static void virtio_vsock_remove(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + + vdev->priv = NULL; + rcu_assign_pointer(the_virtio_vsock, NULL); + synchronize_rcu(); + + virtio_vsock_vqs_del(vsock); /* Other works can be queued before 'config->del_vqs()', so we flush * all works before to free the vsock object to avoid use after free. @@ -725,6 +743,49 @@ static void virtio_vsock_remove(struct virtio_device *vdev) kfree(vsock); } +#ifdef CONFIG_PM_SLEEP +static int virtio_vsock_freeze(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + + rcu_assign_pointer(the_virtio_vsock, NULL); + synchronize_rcu(); + + virtio_vsock_vqs_del(vsock); + + mutex_unlock(&the_virtio_vsock_mutex); + + return 0; +} + +static int virtio_vsock_restore(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + int ret; + + mutex_lock(&the_virtio_vsock_mutex); + + /* Only one virtio-vsock device per guest is supported */ + if (rcu_dereference_protected(the_virtio_vsock, + lockdep_is_held(&the_virtio_vsock_mutex))) { + ret = -EBUSY; + goto out; + } + + ret = virtio_vsock_vqs_init(vsock); + if (ret < 0) + goto out; + + rcu_assign_pointer(the_virtio_vsock, vsock); + +out: + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + static struct virtio_device_id id_table[] = { { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -742,6 +803,10 @@ static struct virtio_driver virtio_vsock_driver = { .id_table = id_table, .probe = virtio_vsock_probe, .remove = virtio_vsock_remove, +#ifdef CONFIG_PM_SLEEP + .freeze = virtio_vsock_freeze, + .restore = virtio_vsock_restore, +#endif }; static int __init virtio_vsock_init(void) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b17dc9745188..b14f0ed7427b 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1732,19 +1732,16 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, int flags) { int err; - int noblock; struct vmci_datagram *dg; size_t payload_len; struct sk_buff *skb; - noblock = flags & MSG_DONTWAIT; - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + skb = skb_recv_datagram(&vsk->sk, flags, &err); if (!skb) return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 21e808fcb676..945ed87d12e0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3710,6 +3710,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: if (wdev->ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) goto nla_put_failure_locked; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 3a171828638b..6bc2ac8d8146 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1315,8 +1315,7 @@ static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } else { /* Now we can treat all alike */ release_sock(sk); - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); lock_sock(sk); if (!skb) goto out; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3a9348030e20..e0a4526ab66b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -184,7 +184,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) xsk_xdp = xsk_buff_alloc(xs->pool); if (!xsk_xdp) { xs->rx_dropped++; - return -ENOSPC; + return -ENOMEM; } xsk_copy_xdp(xsk_xdp, xdp, len); @@ -217,7 +217,7 @@ static bool xsk_is_bound(struct xdp_sock *xs) static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp) { if (!xsk_is_bound(xs)) - return -EINVAL; + return -ENXIO; if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 801cda5d1938..a794410989cc 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -263,7 +263,7 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) { - return xskq_cons_nb_entries(q, cnt) >= cnt ? true : false; + return xskq_cons_nb_entries(q, cnt) >= cnt; } static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) @@ -382,7 +382,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q, u32 idx; if (xskq_prod_is_full(q)) - return -ENOSPC; + return -ENOBUFS; /* A, matches D */ idx = q->cached_prod++ & q->ring_mask; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 65b53fb3de13..acc8e52a4f5f 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -9,6 +9,7 @@ #include <net/xdp_sock.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/btf_ids.h> #include "xsk.h" @@ -254,7 +255,7 @@ static bool xsk_map_meta_equal(const struct bpf_map *meta0, bpf_map_meta_equal(meta0, meta1); } -static int xsk_map_btf_id; +BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map) const struct bpf_map_ops xsk_map_ops = { .map_meta_equal = xsk_map_meta_equal, .map_alloc = xsk_map_alloc, @@ -266,7 +267,6 @@ const struct bpf_map_ops xsk_map_ops = { .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, - .map_btf_name = "xsk_map", - .map_btf_id = &xsk_map_btf_id, + .map_btf_id = &xsk_map_btf_ids[0], .map_redirect = xsk_map_redirect, }; diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 1f08ebf7d80c..82d14eea1b5a 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -131,7 +131,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) } static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct espintcp_ctx *ctx = espintcp_getctx(sk); struct sk_buff *skb; @@ -139,8 +139,6 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied; int off = 0; - flags |= nonblock ? MSG_DONTWAIT : 0; - skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err); if (!skb) { if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) |