aboutsummaryrefslogtreecommitdiffstats
path: root/net/sctp/socket.c (follow)
AgeCommit message (Collapse)AuthorFilesLines
2022-10-11treewide: use get_random_{u8,u16}() when possible, part 1Jason A. Donenfeld1-1/+1
Rather than truncate a 32-bit value to a 16-bit value or an 8-bit value, simply use the get_random_{u8,u16}() functions, which are faster than wasting the additional bytes from a 32-bit value. This was done mechanically with this coccinelle script: @@ expression E; identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; typedef u16; typedef __be16; typedef __le16; typedef u8; @@ ( - (get_random_u32() & 0xffff) + get_random_u16() | - (get_random_u32() & 0xff) + get_random_u8() | - (get_random_u32() % 65536) + get_random_u16() | - (get_random_u32() % 256) + get_random_u8() | - (get_random_u32() >> 16) + get_random_u16() | - (get_random_u32() >> 24) + get_random_u8() | - (u16)get_random_u32() + get_random_u16() | - (u8)get_random_u32() + get_random_u8() | - (__be16)get_random_u32() + (__be16)get_random_u16() | - (__le16)get_random_u32() + (__le16)get_random_u16() | - prandom_u32_max(65536) + get_random_u16() | - prandom_u32_max(256) + get_random_u8() | - E->inet_id = get_random_u32() + E->inet_id = get_random_u16() ) @@ identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; typedef u16; identifier v; @@ - u16 v = get_random_u32(); + u16 v = get_random_u16(); @@ identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; typedef u8; identifier v; @@ - u8 v = get_random_u32(); + u8 v = get_random_u8(); @@ identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; typedef u16; u16 v; @@ - v = get_random_u32(); + v = get_random_u16(); @@ identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; typedef u8; u8 v; @@ - v = get_random_u32(); + v = get_random_u8(); // Find a potential literal @literal_mask@ expression LITERAL; type T; identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; position p; @@ ((T)get_random_u32()@p & (LITERAL)) // Examine limits @script:python add_one@ literal << literal_mask.LITERAL; RESULT; @@ value = None if literal.startswith('0x'): value = int(literal, 16) elif literal[0] in '123456789': value = int(literal, 10) if value is None: print("I don't know how to handle %s" % (literal)) cocci.include_match(False) elif value < 256: coccinelle.RESULT = cocci.make_ident("get_random_u8") elif value < 65536: coccinelle.RESULT = cocci.make_ident("get_random_u16") else: print("Skipping large mask of %s" % (literal)) cocci.include_match(False) // Replace the literal mask with the calculated result. @plus_one@ expression literal_mask.LITERAL; position literal_mask.p; identifier add_one.RESULT; identifier FUNC; @@ - (FUNC()@p & (LITERAL)) + (RESULT() & LITERAL) Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Yury Norov <yury.norov@gmail.com> Acked-by: Jakub Kicinski <kuba@kernel.org> Acked-by: Toke Høiland-Jørgensen <toke@toke.dk> # for sch_cake Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
2022-10-11treewide: use prandom_u32_max() when possible, part 1Jason A. Donenfeld1-1/+1
Rather than incurring a division or requesting too many random bytes for the given range, use the prandom_u32_max() function, which only takes the minimum required bytes from the RNG and avoids divisions. This was done mechanically with this coccinelle script: @basic@ expression E; type T; identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; typedef u64; @@ ( - ((T)get_random_u32() % (E)) + prandom_u32_max(E) | - ((T)get_random_u32() & ((E) - 1)) + prandom_u32_max(E * XXX_MAKE_SURE_E_IS_POW2) | - ((u64)(E) * get_random_u32() >> 32) + prandom_u32_max(E) | - ((T)get_random_u32() & ~PAGE_MASK) + prandom_u32_max(PAGE_SIZE) ) @multi_line@ identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; identifier RAND; expression E; @@ - RAND = get_random_u32(); ... when != RAND - RAND %= (E); + RAND = prandom_u32_max(E); // Find a potential literal @literal_mask@ expression LITERAL; type T; identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32"; position p; @@ ((T)get_random_u32()@p & (LITERAL)) // Add one to the literal. @script:python add_one@ literal << literal_mask.LITERAL; RESULT; @@ value = None if literal.startswith('0x'): value = int(literal, 16) elif literal[0] in '123456789': value = int(literal, 10) if value is None: print("I don't know how to handle %s" % (literal)) cocci.include_match(False) elif value == 2**32 - 1 or value == 2**31 - 1 or value == 2**24 - 1 or value == 2**16 - 1 or value == 2**8 - 1: print("Skipping 0x%x for cleanup elsewhere" % (value)) cocci.include_match(False) elif value & (value + 1) != 0: print("Skipping 0x%x because it's not a power of two minus one" % (value)) cocci.include_match(False) elif literal.startswith('0x'): coccinelle.RESULT = cocci.make_expr("0x%x" % (value + 1)) else: coccinelle.RESULT = cocci.make_expr("%d" % (value + 1)) // Replace the literal mask with the calculated result. @plus_one@ expression literal_mask.LITERAL; position literal_mask.p; expression add_one.RESULT; identifier FUNC; @@ - (FUNC()@p & (LITERAL)) + prandom_u32_max(RESULT) @collapse_ret@ type T; identifier VAR; expression E; @@ { - T VAR; - VAR = (E); - return VAR; + return E; } @drop_var@ type T; identifier VAR; @@ { - T VAR; ... when != VAR } Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Yury Norov <yury.norov@gmail.com> Reviewed-by: KP Singh <kpsingh@kernel.org> Reviewed-by: Jan Kara <jack@suse.cz> # for ext4 and sbitmap Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com> # for drbd Acked-by: Jakub Kicinski <kuba@kernel.org> Acked-by: Heiko Carstens <hca@linux.ibm.com> # for s390 Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # for mmc Acked-by: Darrick J. Wong <djwong@kernel.org> # for xfs Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
2022-06-10net: keep sk->sk_forward_alloc as small as possibleEric Dumazet1-5/+0
Currently, tcp_memory_allocated can hit tcp_mem[] limits quite fast. Each TCP socket can forward allocate up to 2 MB of memory, even after flow became less active. 10,000 sockets can have reserved 20 GB of memory, and we have no shrinker in place to reclaim that. Instead of trying to reclaim the extra allocations in some places, just keep sk->sk_forward_alloc values as small as possible. This should not impact performance too much now we have per-cpu reserves: Changes to tcp_memory_allocated should not be too frequent. For sockets not using SO_RESERVE_MEM: - idle sockets (no packets in tx/rx queues) have zero forward alloc. - non idle sockets have a forward alloc smaller than one page. Note: - Removal of SK_RECLAIM_CHUNK and SK_RECLAIM_THRESHOLD is left to MPTCP maintainers as a follow up. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-06-10net: add per_cpu_fw_alloc field to struct protoEric Dumazet1-0/+7
Each protocol having a ->memory_allocated pointer gets a corresponding per-cpu reserve, that following patches will use. Instead of having reserved bytes per socket, we want to have per-cpu reserves. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-04-28net: SO_RCVMARK socket option for SO_MARK with recvmsg()Erin MacNeil1-1/+1
Adding a new socket option, SO_RCVMARK, to indicate that SO_MARK should be included in the ancillary data returned by recvmsg(). Renamed the sock_recv_ts_and_drops() function to sock_recv_cmsgs(). Signed-off-by: Erin MacNeil <lnx.erin@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> Link: https://lore.kernel.org/r/20220427200259.2564-1-lnx.erin@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-04-15Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netPaolo Abeni1-1/+1
2022-04-12net: remove noblock parameter from recvmsg() entitiesOliver Hartkopp1-9/+7
The internal recvmsg() functions have two parameters 'flags' and 'noblock' that were merged inside skb_recv_datagram(). As a follow up patch to commit f4b41f062c42 ("net: remove noblock parameter from skb_recv_datagram()") this patch removes the separate 'noblock' parameter for recvmsg(). Analogue to the referenced patch for skb_recv_datagram() the 'flags' and 'noblock' parameters are unnecessarily split up with e.g. err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); or in err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); instead of simply using only flags all the time and check for MSG_DONTWAIT where needed (to preserve for the formerly separated no(n)block condition). Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net> Link: https://lore.kernel.org/r/20220411124955.154876-1-socketcan@hartkopp.net Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2022-04-11sctp: Initialize daddr on peeled off socketPetr Malat1-1/+1
Function sctp_do_peeloff() wrongly initializes daddr of the original socket instead of the peeled off socket, which makes getpeername() return zeroes instead of the primary address. Initialize the new socket instead. Fixes: d570ee490fb1 ("[SCTP]: Correctly set daddr for IPv6 sockets during peeloff") Signed-off-by: Petr Malat <oss@malat.biz> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Link: https://lore.kernel.org/r/20220409063611.673193-1-oss@malat.biz Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-05Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-7/+15
No conflicts. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-02sctp: hold endpoint before calling cb in sctp_transport_lookup_processXin Long1-7/+15
The same fix in commit 5ec7d18d1813 ("sctp: use call_rcu to free endpoint") is also needed for dumping one asoc and sock after the lookup. Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-30Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-8/+15
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c commit 077cdda764c7 ("net/mlx5e: TC, Fix memory leak with rules with internal port") commit 31108d142f36 ("net/mlx5: Fix some error handling paths in 'mlx5e_tc_add_fdb_flow()'") commit 4390c6edc0fb ("net/mlx5: Fix some error handling paths in 'mlx5e_tc_add_fdb_flow()'") https://lore.kernel.org/all/20211229065352.30178-1-saeed@kernel.org/ net/smc/smc_wr.c commit 49dc9013e34b ("net/smc: Use the bitmap API when applicable") commit 349d43127dac ("net/smc: fix kernel panic caused by race of smc_sock") bitmap_zero()/memset() is removed by the fix Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-25sctp: use call_rcu to free endpointXin Long1-8/+15
This patch is to delay the endpoint free by calling call_rcu() to fix another use-after-free issue in sctp_sock_dump(): BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 Call Trace: __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 spin_lock_bh include/linux/spinlock.h:334 [inline] __lock_sock+0x203/0x350 net/core/sock.c:2253 lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 lock_sock include/net/sock.h:1492 [inline] sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 This issue occurs when asoc is peeled off and the old sk is freed after getting it by asoc->base.sk and before calling lock_sock(sk). To prevent the sk free, as a holder of the sk, ep should be alive when calling lock_sock(). This patch uses call_rcu() and moves sock_put and ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to hold the ep under rcu_read_lock in sctp_transport_traverse_process(). If sctp_endpoint_hold() returns true, it means this ep is still alive and we have held it and can continue to dump it; If it returns false, it means this ep is dead and can be freed after rcu_read_unlock, and we should skip it. In sctp_sock_dump(), after locking the sk, if this ep is different from tsp->asoc->ep, it means during this dumping, this asoc was peeled off before calling lock_sock(), and the sk should be skipped; If this ep is the same with tsp->asoc->ep, it means no peeloff happens on this asoc, and due to lock_sock, no peeloff will happen either until release_sock. Note that delaying endpoint free won't delay the port release, as the port release happens in sctp_endpoint_destroy() before calling call_rcu(). Also, freeing endpoint by call_rcu() makes it safe to access the sk by asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). Thanks Jones to bring this issue up. v1->v2: - improve the changelog. - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com Reported-by: Lee Jones <lee.jones@linaro.org> Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-23sctp: move hlist_node and hashent out of sctp_ep_commonXin Long1-3/+3
Struct sctp_ep_common is included in both asoc and ep, but hlist_node and hashent are only needed by ep after asoc_hashtable was dropped by Commit b5eff7128366 ("sctp: drop the old assoc hashtable of sctp"). So it is better to move hlist_node and hashent from sctp_ep_common to sctp_endpoint, and it saves some space for each asoc. Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-11-16net: drop nopreempt requirement on sock_prot_inuse_add()Eric Dumazet1-5/+0
This is distracting really, let's make this simpler, because many callers had to take care of this by themselves, even if on x86 this adds more code than really needed. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-11-03security: pass asoc to sctp_assoc_request and sctp_sk_cloneXin Long1-3/+2
This patch is to move secid and peer_secid from endpoint to association, and pass asoc to sctp_assoc_request and sctp_sk_clone instead of ep. As ep is the local endpoint and asoc represents a connection, and in SCTP one sk/ep could have multiple asoc/connection, saving secid/peer_secid for new asoc will overwrite the old asoc's. Note that since asoc can be passed as NULL, security_sctp_assoc_request() is moved to the place right after the new_asoc is created in sctp_sf_do_5_1B_init() and sctp_sf_do_unexpected_init(). v1->v2: - fix the description of selinux_netlbl_skbuff_setsid(), as Jakub noticed. - fix the annotation in selinux_sctp_assoc_request(), as Richard Noticed. Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad <pprahlad@redhat.com> Reviewed-by: Richard Haines <richard_c_haines@btinternet.com> Tested-by: Richard Haines <richard_c_haines@btinternet.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-18sctp: trim optlen when it's a huge value in sctp_setsockoptXin Long1-0/+4
After commit ca84bd058dae ("sctp: copy the optval from user space in sctp_setsockopt"), it does memory allocation in sctp_setsockopt with the optlen, and it would fail the allocation and return error if the optlen from user space is a huge value. This breaks some sockopts, like SCTP_HMAC_IDENT, SCTP_RESET_STREAMS and SCTP_AUTH_KEY, as when processing these sockopts before, optlen would be trimmed to a biggest value it needs when optlen is a huge value, instead of failing the allocation and returning error. This patch is to fix the allocation failure when it's a huge optlen from user space by trimming it to the biggest size sctp sockopt may need when necessary, and this biggest size is from sctp_setsockopt_reset_streams() for SCTP_RESET_STREAMS, which is bigger than those for SCTP_HMAC_IDENT and SCTP_AUTH_KEY. Fixes: ca84bd058dae ("sctp: copy the optval from user space in sctp_setsockopt") Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-22sctp: enable PLPMTUD when the transport is readyXin Long1-1/+5
sctp_transport_pl_reset() is called whenever any of these 3 members in transport is changed: - probe_interval - param_flags & SPP_PMTUD_ENABLE - state == ACTIVE If all are true, start the PLPMTUD when it's not yet started. If any of these is false, stop the PLPMTUD when it's already running. sctp_transport_pl_update() is called when the transport dst has changed. It will restart the PLPMTUD probe. Again, the pathmtu won't change but use the dst's mtu until the Search phase is done. Note that after using PLPMTUD, the pathmtu is only initialized with the dst mtu when the transport dst changes. At other time it is updated by pl.pmtu. So sctp_transport_pmtu_check() will be called only when PLPMTUD is disabled in sctp_packet_config(). After this patch, the PLPMTUD feature from RFC8899 will be activated and can be used by users. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-22sctp: add SCTP_PLPMTUD_PROBE_INTERVAL sockopt for sock/asoc/transportXin Long1-0/+118
With this socket option, users can change probe_interval for a transport, asoc or sock after it's created. Note that if the change is for an asoc, also apply the change to each transport in this asoc. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-22sctp: add probe_interval in sysctl and sock/asoc/transportXin Long1-0/+1
PLPMTUD can be enabled by doing 'sysctl -w net.sctp.probe_interval=n'. 'n' is the interval for PLPMTUD probe timer in milliseconds, and it can't be less than 5000 if it's not 0. All asoc/transport's PLPMTUD in a new socket will be enabled by default. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-25sctp: add the missing setting for asoc encap_portXin Long1-0/+1
This patch is to add the missing setting back for asoc encap_port. Fixes: 8dba29603b5c ("sctp: add SCTP_REMOTE_UDP_ENCAPS_PORT sockopt") Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-03sctp: delay auto_asconf init until binding the first addrXin Long1-14/+17
As Or Cohen described: If sctp_destroy_sock is called without sock_net(sk)->sctp.addr_wq_lock held and sp->do_auto_asconf is true, then an element is removed from the auto_asconf_splist without any proper locking. This can happen in the following functions: 1. In sctp_accept, if sctp_sock_migrate fails. 2. In inet_create or inet6_create, if there is a bpf program attached to BPF_CGROUP_INET_SOCK_CREATE which denies creation of the sctp socket. This patch is to fix it by moving the auto_asconf init out of sctp_init_sock(), by which inet_create()/inet6_create() won't need to operate it in sctp_destroy_sock() when calling sk_common_release(). It also makes more sense to do auto_asconf init while binding the first addr, as auto_asconf actually requires an ANY addr bind, see it in sctp_addr_wq_timeout_handler(). This addresses CVE-2021-23133. Fixes: 610236587600 ("bpf: Add new cgroup attach type to enable sock modifications") Reported-by: Or Cohen <orcohen@paloaltonetworks.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-05-03Revert "net/sctp: fix race condition in sctp_destroy_sock"Xin Long1-5/+8
This reverts commit b166a20b07382b8bc1dcee2a448715c9c2c81b5b. This one has to be reverted as it introduced a dead lock, as syzbot reported: CPU0 CPU1 ---- ---- lock(&net->sctp.addr_wq_lock); lock(slock-AF_INET6); lock(&net->sctp.addr_wq_lock); lock(slock-AF_INET6); CPU0 is the thread of sctp_addr_wq_timeout_handler(), and CPU1 is that of sctp_close(). The original issue this commit fixed will be fixed in the next patch. Reported-by: syzbot+959223586843e69a2674@syzkaller.appspotmail.com Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-17Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-8/+5
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c - keep the ZC code, drop the code related to reinit net/bridge/netfilter/ebtables.c - fix build after move to net_generic Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-04-13net/sctp: fix race condition in sctp_destroy_sockOr Cohen1-8/+5
If sctp_destroy_sock is called without sock_net(sk)->sctp.addr_wq_lock held and sp->do_auto_asconf is true, then an element is removed from the auto_asconf_splist without any proper locking. This can happen in the following functions: 1. In sctp_accept, if sctp_sock_migrate fails. 2. In inet_create or inet6_create, if there is a bpf program attached to BPF_CGROUP_INET_SOCK_CREATE which denies creation of the sctp socket. The bug is fixed by acquiring addr_wq_lock in sctp_destroy_sock instead of sctp_close. This addresses CVE-2021-23133. Reported-by: Or Cohen <orcohen@paloaltonetworks.com> Reviewed-by: Xin Long <lucien.xin@gmail.com> Fixes: 610236587600 ("bpf: Add new cgroup attach type to enable sock modifications") Signed-off-by: Or Cohen <orcohen@paloaltonetworks.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-03-28net: sctp: Fix some typosLu Wei1-1/+1
Modify "unkown" to "unknown" in net/sctp/sm_make_chunk.c and Modify "orginal" to "original" in net/sctp/socket.c. Reported-by: Hulk Robot <hulkci@huawei.com> Signed-off-by: Lu Wei <luwei32@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-30sctp: add udphdr to overhead when udp_port is setXin Long1-0/+1
sctp_mtu_payload() is for calculating the frag size before making chunks from a msg. So we should only add udphdr size to overhead when udp socks are listening, as only then sctp can handle the incoming sctp over udp packets and outgoing sctp over udp packets will be possible. Note that we can't do this according to transport->encap_port, as different transports may be set to different values, while the chunks were made before choosing the transport, we could not be able to meet all rfc6951#section-5.6 recommends. v1->v2: - Add udp_port for sctp_sock to avoid a potential race issue, it will be used in xmit path in the next patch. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-30sctp: add SCTP_REMOTE_UDP_ENCAPS_PORT sockoptXin Long1-0/+114
This patch is to implement: rfc6951#section-6.1: Get or Set the Remote UDP Encapsulation Port Number with the param of the struct: struct sctp_udpencaps { sctp_assoc_t sue_assoc_id; struct sockaddr_storage sue_address; uint16_t sue_port; }; the encap_port of sock, assoc or transport can be changed by users, which also means it allows the different transports of the same asoc to have different encap_port value. v1->v2: - no change. v2->v3: - fix the endian warning when setting values between encap_port and sue_port. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-30sctp: add encap_port for netns sock asoc and transportXin Long1-0/+1
encap_port is added as per netns/sock/assoc/transport, and the latter one's encap_port inherits the former one's by default. The transport's encap_port value would mostly decide if one packet should go out with udp encapsulated or not. This patch also allows users to set netns' encap_port by sysctl. v1->v2: - Change to define encap_port as __be16 for sctp_sock, asoc and transport. v2->v3: - No change. v3->v4: - Add 'encap_port' entry in ip-sysctl.rst. v4->v5: - Improve the description of encap_port in ip-sysctl.rst. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-09-20net: sctp: Fix IPv6 ancestor_size calc in sctp_copy_descendantHenry Ptasinski1-6/+3
When calculating ancestor_size with IPv6 enabled, simply using sizeof(struct ipv6_pinfo) doesn't account for extra bytes needed for alignment in the struct sctp6_sock. On x86, there aren't any extra bytes, but on ARM the ipv6_pinfo structure is aligned on an 8-byte boundary so there were 4 pad bytes that were omitted from the ancestor_size calculation. This would lead to corruption of the pd_lobby pointers, causing an oops when trying to free the sctp structure on socket close. Fixes: 636d25d557d1 ("sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant") Signed-off-by: Henry Ptasinski <hptasinski@google.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-08-24sctp: not disable bh in the whole sctp_get_port_local()Xin Long1-10/+6
With disabling bh in the whole sctp_get_port_local(), when snum == 0 and too many ports have been used, the do-while loop will take the cpu for a long time and cause cpu stuck: [ ] watchdog: BUG: soft lockup - CPU#11 stuck for 22s! [ ] RIP: 0010:native_queued_spin_lock_slowpath+0x4de/0x940 [ ] Call Trace: [ ] _raw_spin_lock+0xc1/0xd0 [ ] sctp_get_port_local+0x527/0x650 [sctp] [ ] sctp_do_bind+0x208/0x5e0 [sctp] [ ] sctp_autobind+0x165/0x1e0 [sctp] [ ] sctp_connect_new_asoc+0x355/0x480 [sctp] [ ] __sctp_connect+0x360/0xb10 [sctp] There's no need to disable bh in the whole function of sctp_get_port_local. So fix this cpu stuck by removing local_bh_disable() called at the beginning, and using spin_lock_bh() instead. The same thing was actually done for inet_csk_get_port() in Commit ea8add2b1903 ("tcp/dccp: better use of ephemeral ports in bind()"). Thanks to Marcelo for pointing the buggy code out. v1->v2: - use cond_resched() to yield cpu to other tasks if needed, as Eric noticed. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Ying Xu <yinxu@redhat.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-24sctp: fix slab-out-of-bounds in SCTP_DELAYED_SACK processingChristoph Hellwig1-21/+29
This sockopt accepts two kinds of parameters, using struct sctp_sack_info and struct sctp_assoc_value. The mentioned commit didn't notice an implicit cast from the smaller (latter) struct to the bigger one (former) when copying the data from the user space, which now leads to an attempt to write beyond the buffer (because it assumes the storing buffer is bigger than the parameter itself). Fix it by allocating a sctp_sack_info on stack and filling it out based on the small struct for the compat case. Changelog stole from an earlier patch from Marcelo Ricardo Leitner. Fixes: ebb25defdc17 ("sctp: pass a kernel pointer to sctp_setsockopt_delayed_ack") Reported-by: syzbot+0e4699d000d8b874d8dc@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-24net: pass a sockptr_t into ->setsockoptChristoph Hellwig1-2/+2
Rework the remaining setsockopt code to pass a sockptr_t instead of a plain user pointer. This removes the last remaining set_fs(KERNEL_DS) outside of architecture specific code. Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Stefan Schmidt <stefan@datenfreihafen.org> [ieee802154] Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: remove the out_nounlock label in sctp_setsockoptChristoph Hellwig1-4/+2
This is just used once, and a direct return for the redirect to the AF case is much easier to follow than jumping to the end of a very long function. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_pf_exposeChristoph Hellwig1-14/+8
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_ecn_supportedChristoph Hellwig1-12/+6
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_auth_supportedChristoph Hellwig1-13/+7
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_eventChristoph Hellwig1-12/+6
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_eventChristoph Hellwig1-20/+15
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_reuse_portChristoph Hellwig1-8/+3
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_interleaving_supportedChristoph Hellwig1-24/+10
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_scheduler_valueChristoph Hellwig1-16/+10
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_schedulerChristoph Hellwig1-19/+14
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_add_streamsChristoph Hellwig1-17/+7
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_reset_assocChristoph Hellwig1-18/+7
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_reset_streamsChristoph Hellwig1-15/+5
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_enable_strresetChristoph Hellwig1-20/+14
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_reconfig_supportedChristoph Hellwig1-12/+6
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_default_prinfoChristoph Hellwig1-25/+20
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_pr_supportedChristoph Hellwig1-10/+6
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-19sctp: pass a kernel pointer to sctp_setsockopt_recvnxtinfoChristoph Hellwig1-8/+3
Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David S. Miller <davem@davemloft.net>