aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/net/smc/af_smc.c (follow)
AgeCommit message (Collapse)AuthorFilesLines
2025-04-11smc: Fix lockdep false-positive for IPPROTO_SMC.Kuniyuki Iwashima1-0/+5
SMC consists of two sockets: smc_sock and kernel TCP socket. Currently, there are two ways of creating the sockets, and syzbot reported a lockdep splat [0] for the newer way introduced by commit d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC"). socket(AF_SMC , SOCK_STREAM, SMCPROTO_SMC or SMCPROTO_SMC6) socket(AF_INET or AF_INET6, SOCK_STREAM, IPPROTO_SMC) When a socket is allocated, sock_lock_init() sets a lockdep lock class to sk->sk_lock.slock based on its protocol family. In the IPPROTO_SMC case, AF_INET or AF_INET6 lock class is assigned to smc_sock. The repro sets IPV6_JOIN_ANYCAST for IPv6 UDP and SMC socket and exercises smc_switch_to_fallback() for IPPROTO_SMC. 1. smc_switch_to_fallback() is called under lock_sock() and holds smc->clcsock_release_lock. sk_lock-AF_INET6 -> &smc->clcsock_release_lock (sk_lock-AF_SMC) 2. Setting IPV6_JOIN_ANYCAST to SMC holds smc->clcsock_release_lock and calls setsockopt() for the kernel TCP socket, which holds RTNL and the kernel socket's lock_sock(). &smc->clcsock_release_lock -> rtnl_mutex (-> k-sk_lock-AF_INET6) 3. Setting IPV6_JOIN_ANYCAST to UDP holds RTNL and lock_sock(). rtnl_mutex -> sk_lock-AF_INET6 Then, lockdep detects a false-positive circular locking, .-> sk_lock-AF_INET6 -> &smc->clcsock_release_lock -> rtnl_mutex -. `-----------------------------------------------------------------' but IPPROTO_SMC should have the same locking rule as AF_SMC. sk_lock-AF_SMC -> &smc->clcsock_release_lock -> rtnl_mutex -> k-sk_lock-AF_INET6 Let's set the same lock class for smc_sock. Given AF_SMC uses the same lock class for SMCPROTO_SMC and SMCPROTO_SMC6, we do not need to separate the class for AF_INET and AF_INET6. [0]: WARNING: possible circular locking dependency detected 6.14.0-rc3-syzkaller-00267-gff202c5028a1 #0 Not tainted syz.4.1528/11571 is trying to acquire lock: ffffffff8fef8de8 (rtnl_mutex){+.+.}-{4:4}, at: ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 but task is already holding lock: ffff888027f596a8 (&smc->clcsock_release_lock){+.+.}-{4:4}, at: smc_clcsock_release+0x75/0xe0 net/smc/smc_close.c:30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&smc->clcsock_release_lock){+.+.}-{4:4}: __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 smc_switch_to_fallback+0x2d/0xa00 net/smc/af_smc.c:903 smc_sendmsg+0x13d/0x520 net/smc/af_smc.c:2781 sock_sendmsg_nosec net/socket.c:718 [inline] __sock_sendmsg net/socket.c:733 [inline] ____sys_sendmsg+0xaaf/0xc90 net/socket.c:2573 ___sys_sendmsg+0x135/0x1e0 net/socket.c:2627 __sys_sendmsg+0x16e/0x220 net/socket.c:2659 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 (sk_lock-AF_INET6){+.+.}-{0:0}: lock_sock_nested+0x3a/0xf0 net/core/sock.c:3645 lock_sock include/net/sock.h:1624 [inline] sockopt_lock_sock net/core/sock.c:1133 [inline] sockopt_lock_sock+0x54/0x70 net/core/sock.c:1124 do_ipv6_setsockopt+0x2160/0x4520 net/ipv6/ipv6_sockglue.c:567 ipv6_setsockopt+0xcb/0x170 net/ipv6/ipv6_sockglue.c:993 udpv6_setsockopt+0x7d/0xd0 net/ipv6/udp.c:1850 do_sock_setsockopt+0x222/0x480 net/socket.c:2303 __sys_setsockopt+0x1a0/0x230 net/socket.c:2328 __do_sys_setsockopt net/socket.c:2334 [inline] __se_sys_setsockopt net/socket.c:2331 [inline] __x64_sys_setsockopt+0xbd/0x160 net/socket.c:2331 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (rtnl_mutex){+.+.}-{4:4}: check_prev_add kernel/locking/lockdep.c:3163 [inline] check_prevs_add kernel/locking/lockdep.c:3282 [inline] validate_chain kernel/locking/lockdep.c:3906 [inline] __lock_acquire+0x249e/0x3c40 kernel/locking/lockdep.c:5228 lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5851 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 inet6_release+0x47/0x70 net/ipv6/af_inet6.c:485 __sock_release net/socket.c:647 [inline] sock_release+0x8e/0x1d0 net/socket.c:675 smc_clcsock_release+0xb7/0xe0 net/smc/smc_close.c:34 __smc_release+0x5c2/0x880 net/smc/af_smc.c:301 smc_release+0x1fc/0x5f0 net/smc/af_smc.c:344 __sock_release+0xb0/0x270 net/socket.c:647 sock_close+0x1c/0x30 net/socket.c:1398 __fput+0x3ff/0xb70 fs/file_table.c:464 task_work_run+0x14e/0x250 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x27b/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Chain exists of: rtnl_mutex --> sk_lock-AF_INET6 --> &smc->clcsock_release_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&smc->clcsock_release_lock); lock(sk_lock-AF_INET6); lock(&smc->clcsock_release_lock); lock(rtnl_mutex); *** DEADLOCK *** 2 locks held by syz.4.1528/11571: #0: ffff888077e88208 (&sb->s_type->i_mutex_key#10){+.+.}-{4:4}, at: inode_lock include/linux/fs.h:877 [inline] #0: ffff888077e88208 (&sb->s_type->i_mutex_key#10){+.+.}-{4:4}, at: __sock_release+0x86/0x270 net/socket.c:646 #1: ffff888027f596a8 (&smc->clcsock_release_lock){+.+.}-{4:4}, at: smc_clcsock_release+0x75/0xe0 net/smc/smc_close.c:30 stack backtrace: CPU: 0 UID: 0 PID: 11571 Comm: syz.4.1528 Not tainted 6.14.0-rc3-syzkaller-00267-gff202c5028a1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: <TASK> __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_circular_bug+0x490/0x760 kernel/locking/lockdep.c:2076 check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2208 check_prev_add kernel/locking/lockdep.c:3163 [inline] check_prevs_add kernel/locking/lockdep.c:3282 [inline] validate_chain kernel/locking/lockdep.c:3906 [inline] __lock_acquire+0x249e/0x3c40 kernel/locking/lockdep.c:5228 lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5851 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 inet6_release+0x47/0x70 net/ipv6/af_inet6.c:485 __sock_release net/socket.c:647 [inline] sock_release+0x8e/0x1d0 net/socket.c:675 smc_clcsock_release+0xb7/0xe0 net/smc/smc_close.c:34 __smc_release+0x5c2/0x880 net/smc/af_smc.c:301 smc_release+0x1fc/0x5f0 net/smc/af_smc.c:344 __sock_release+0xb0/0x270 net/socket.c:647 sock_close+0x1c/0x30 net/socket.c:1398 __fput+0x3ff/0xb70 fs/file_table.c:464 task_work_run+0x14e/0x250 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x27b/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8b4b38d169 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe4efd22d8 EFLAGS: 00000246 ORIG_RAX: 00000000000001b4 RAX: 0000000000000000 RBX: 00000000000b14a3 RCX: 00007f8b4b38d169 RDX: 0000000000000000 RSI: 000000000000001e RDI: 0000000000000003 RBP: 00007f8b4b5a7ba0 R08: 0000000000000001 R09: 000000114efd25cf R10: 00007f8b4b200000 R11: 0000000000000246 R12: 00007f8b4b5a5fac R13: 00007f8b4b5a5fa0 R14: ffffffffffffffff R15: 00007ffe4efd23f0 </TASK> Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Reported-by: syzbot+be6f4b383534d88989f7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=be6f4b383534d88989f7 Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Link: https://patch.msgid.link/20250407170332.26959-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-02-21net: better track kernel sockets lifetimeEric Dumazet1-4/+1
While kernel sockets are dismantled during pernet_operations->exit(), their freeing can be delayed by any tx packets still held in qdisc or device queues, due to skb_set_owner_w() prior calls. This then trigger the following warning from ref_tracker_dir_exit() [1] To fix this, make sure that kernel sockets own a reference on net->passive. Add sk_net_refcnt_upgrade() helper, used whenever a kernel socket is converted to a refcounted one. [1] [ 136.263918][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at [ 136.263918][ T35] sk_alloc+0x2b3/0x370 [ 136.263918][ T35] inet6_create+0x6ce/0x10f0 [ 136.263918][ T35] __sock_create+0x4c0/0xa30 [ 136.263918][ T35] inet_ctl_sock_create+0xc2/0x250 [ 136.263918][ T35] igmp6_net_init+0x39/0x390 [ 136.263918][ T35] ops_init+0x31e/0x590 [ 136.263918][ T35] setup_net+0x287/0x9e0 [ 136.263918][ T35] copy_net_ns+0x33f/0x570 [ 136.263918][ T35] create_new_namespaces+0x425/0x7b0 [ 136.263918][ T35] unshare_nsproxy_namespaces+0x124/0x180 [ 136.263918][ T35] ksys_unshare+0x57d/0xa70 [ 136.263918][ T35] __x64_sys_unshare+0x38/0x40 [ 136.263918][ T35] do_syscall_64+0xf3/0x230 [ 136.263918][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 136.263918][ T35] [ 136.343488][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at [ 136.343488][ T35] sk_alloc+0x2b3/0x370 [ 136.343488][ T35] inet6_create+0x6ce/0x10f0 [ 136.343488][ T35] __sock_create+0x4c0/0xa30 [ 136.343488][ T35] inet_ctl_sock_create+0xc2/0x250 [ 136.343488][ T35] ndisc_net_init+0xa7/0x2b0 [ 136.343488][ T35] ops_init+0x31e/0x590 [ 136.343488][ T35] setup_net+0x287/0x9e0 [ 136.343488][ T35] copy_net_ns+0x33f/0x570 [ 136.343488][ T35] create_new_namespaces+0x425/0x7b0 [ 136.343488][ T35] unshare_nsproxy_namespaces+0x124/0x180 [ 136.343488][ T35] ksys_unshare+0x57d/0xa70 [ 136.343488][ T35] __x64_sys_unshare+0x38/0x40 [ 136.343488][ T35] do_syscall_64+0xf3/0x230 [ 136.343488][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 0cafd77dcd03 ("net: add a refcount tracker for kernel sockets") Reported-by: syzbot+30a19e01a97420719891@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67b72aeb.050a0220.14d86d.0283.GAE@google.com/T/#u Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://patch.msgid.link/20250220131854.4048077-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-13net/smc: fix data error when recvmsg with MSG_PEEK flagGuangguan Wang1-1/+1
When recvmsg with MSG_PEEK flag, the data will be copied to user's buffer without advancing consume cursor and without reducing the length of rx available data. Once the expected peek length is larger than the value of bytes_to_rcv, in the loop of do while in smc_rx_recvmsg, the first loop will copy bytes_to_rcv bytes of data from the position local_tx_ctrl.cons, the second loop will copy the min(bytes_to_rcv, read_remaining) bytes from the position local_tx_ctrl.cons again because of the lacking of process with advancing consume cursor and reducing the length of available data. So do the subsequent loops. The data copied in the second loop and the subsequent loops will result in data error, as it should not be copied if no more data arrives and it should be copied from the position advancing bytes_to_rcv bytes from the local_tx_ctrl.cons if more data arrives. This issue can be reproduce by the following python script: server.py: import socket import time server_ip = '0.0.0.0' server_port = 12346 server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_socket.bind((server_ip, server_port)) server_socket.listen(1) print('Server is running and listening for connections...') conn, addr = server_socket.accept() print('Connected by', addr) while True: data = conn.recv(1024) if not data: break print('Received request:', data.decode()) conn.sendall(b'Hello, client!\n') time.sleep(5) conn.sendall(b'Hello, again!\n') conn.close() client.py: import socket server_ip = '<server ip>' server_port = 12346 resp=b'Hello, client!\nHello, again!\n' client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client_socket.connect((server_ip, server_port)) request = 'Hello, server!' client_socket.sendall(request.encode()) peek_data = client_socket.recv(len(resp), socket.MSG_PEEK | socket.MSG_WAITALL) print('Peeked data:', peek_data.decode()) client_socket.close() Fixes: 952310ccf2d8 ("smc: receive data from RMBE") Reported-by: D. Wythe <alibuda@linux.alibaba.com> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Link: https://patch.msgid.link/20250104143201.35529-1-guangguan.wang@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-12-19Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-2/+16
Cross-merge networking fixes after downstream PR (net-6.13-rc4). No conflicts. Adjacent changes: drivers/net/ethernet/renesas/rswitch.h 32fd46f5b69e ("net: renesas: rswitch: remove speed from gwca structure") 922b4b955a03 ("net: renesas: rswitch: rework ts tags management") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-12-15net/smc: check smcd_v2_ext_offset when receiving proposal msgGuangguan Wang1-0/+2
When receiving proposal msg in server, the field smcd_v2_ext_offset in proposal msg is from the remote client and can not be fully trusted. Once the value of smcd_v2_ext_offset exceed the max value, there has the chance to access wrong address, and crash may happen. This patch checks the value of smcd_v2_ext_offset before using it. Fixes: 5c21c4ccafe8 ("net/smc: determine accepted ISM devices") Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: D. Wythe <alibuda@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-12-15net/smc: check v2_ext_offset/eid_cnt/ism_gid_cnt when receiving proposal msgGuangguan Wang1-1/+2
When receiving proposal msg in server, the fields v2_ext_offset/ eid_cnt/ism_gid_cnt in proposal msg are from the remote client and can not be fully trusted. Especially the field v2_ext_offset, once exceed the max value, there has the chance to access wrong address, and crash may happen. This patch checks the fields v2_ext_offset/eid_cnt/ism_gid_cnt before using them. Fixes: 8c3dca341aea ("net/smc: build and send V2 CLC proposal") Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: D. Wythe <alibuda@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-12-15net/smc: check iparea_offset and ipv6_prefixes_cnt when receiving proposal msgGuangguan Wang1-1/+5
When receiving proposal msg in server, the field iparea_offset and the field ipv6_prefixes_cnt in proposal msg are from the remote client and can not be fully trusted. Especially the field iparea_offset, once exceed the max value, there has the chance to access wrong address, and crash may happen. This patch checks iparea_offset and ipv6_prefixes_cnt before using them. Fixes: e7b7a64a8493 ("smc: support variable CLC proposal messages") Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: D. Wythe <alibuda@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-12-15net/smc: check sndbuf_space again after NOSPACE flag is set in smc_pollGuangguan Wang1-0/+7
When application sending data more than sndbuf_space, there have chances application will sleep in epoll_wait, and will never be wakeup again. This is caused by a race between smc_poll and smc_cdc_tx_handler. application tasklet smc_tx_sendmsg(len > sndbuf_space) | epoll_wait for EPOLL_OUT,timeout=0 | smc_poll | if (!smc->conn.sndbuf_space) | | smc_cdc_tx_handler | atomic_add sndbuf_space | smc_tx_sndbuf_nonfull | if (!test_bit SOCK_NOSPACE) | do not sk_write_space; set_bit SOCK_NOSPACE; | return mask=0; | Application will sleep in epoll_wait as smc_poll returns 0. And smc_cdc_tx_handler will not call sk_write_space because the SOCK_NOSPACE has not be set. If there is no inflight cdc msg, sk_write_space will not be called any more, and application will sleep in epoll_wait forever. So check sndbuf_space again after NOSPACE flag is set to break the race. Fixes: 8dce2786a290 ("net/smc: smc_poll improvements") Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Suggested-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-12-12net/smc: support ipv4 mapped ipv6 addr client for smc-r v2Guangguan Wang1-1/+4
AF_INET6 is not supported for smc-r v2 client before, even if the ipv6 addr is ipv4 mapped. Thus, when using AF_INET6, smc-r connection will fallback to tcp, especially for java applications running smc-r. This patch support ipv4 mapped ipv6 addr client for smc-r v2. Clients using real global ipv6 addr is still not supported yet. Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Reviewed-by: D. Wythe <alibuda@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Halil Pasic <pasic@linux.ibm.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-12-03net/smc: fix LGR and link use-after-free issueWen Gu1-0/+2
We encountered a LGR/link use-after-free issue, which manifested as the LGR/link refcnt reaching 0 early and entering the clear process, making resource access unsafe. refcount_t: addition on 0; use-after-free. WARNING: CPU: 14 PID: 107447 at lib/refcount.c:25 refcount_warn_saturate+0x9c/0x140 Workqueue: events smc_lgr_terminate_work [smc] Call trace: refcount_warn_saturate+0x9c/0x140 __smc_lgr_terminate.part.45+0x2a8/0x370 [smc] smc_lgr_terminate_work+0x28/0x30 [smc] process_one_work+0x1b8/0x420 worker_thread+0x158/0x510 kthread+0x114/0x118 or refcount_t: underflow; use-after-free. WARNING: CPU: 6 PID: 93140 at lib/refcount.c:28 refcount_warn_saturate+0xf0/0x140 Workqueue: smc_hs_wq smc_listen_work [smc] Call trace: refcount_warn_saturate+0xf0/0x140 smcr_link_put+0x1cc/0x1d8 [smc] smc_conn_free+0x110/0x1b0 [smc] smc_conn_abort+0x50/0x60 [smc] smc_listen_find_device+0x75c/0x790 [smc] smc_listen_work+0x368/0x8a0 [smc] process_one_work+0x1b8/0x420 worker_thread+0x158/0x510 kthread+0x114/0x118 It is caused by repeated release of LGR/link refcnt. One suspect is that smc_conn_free() is called repeatedly because some smc_conn_free() from server listening path are not protected by sock lock. e.g. Calls under socklock | smc_listen_work ------------------------------------------------------- lock_sock(sk) | smc_conn_abort smc_conn_free | \- smc_conn_free \- smcr_link_put | \- smcr_link_put (duplicated) release_sock(sk) So here add sock lock protection in smc_listen_work() path, making it exclusive with other connection operations. Fixes: 3b2dec2603d5 ("net/smc: restructure client and server code in af_smc") Co-developed-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Co-developed-by: Kai <KaiShen@linux.alibaba.com> Signed-off-by: Kai <KaiShen@linux.alibaba.com> Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-12-03net/smc: initialize close_work early to avoid warningWen Gu1-3/+1
We encountered a warning that close_work was canceled before initialization. WARNING: CPU: 7 PID: 111103 at kernel/workqueue.c:3047 __flush_work+0x19e/0x1b0 Workqueue: events smc_lgr_terminate_work [smc] RIP: 0010:__flush_work+0x19e/0x1b0 Call Trace: ? __wake_up_common+0x7a/0x190 ? work_busy+0x80/0x80 __cancel_work_timer+0xe3/0x160 smc_close_cancel_work+0x1a/0x70 [smc] smc_close_active_abort+0x207/0x360 [smc] __smc_lgr_terminate.part.38+0xc8/0x180 [smc] process_one_work+0x19e/0x340 worker_thread+0x30/0x370 ? process_one_work+0x340/0x340 kthread+0x117/0x130 ? __kthread_cancel_work+0x50/0x50 ret_from_fork+0x22/0x30 This is because when smc_close_cancel_work is triggered, e.g. the RDMA driver is rmmod and the LGR is terminated, the conn->close_work is flushed before initialization, resulting in WARN_ON(!work->func). __smc_lgr_terminate | smc_connect_{rdma|ism} ------------------------------------------------------------- | smc_conn_create | \- smc_lgr_register_conn for conn in lgr->conns_all | \- smc_conn_kill | \- smc_close_active_abort | \- smc_close_cancel_work | \- cancel_work_sync | \- __flush_work | (close_work) | | smc_close_init | \- INIT_WORK(&close_work) So fix this by initializing close_work before establishing the connection. Fixes: 46c28dbd4c23 ("net/smc: no socket state changes in tasklet context") Fixes: 413498440e30 ("net/smc: add SMC-D support in af_smc") Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Alexandra Winter <wintera@linux.ibm.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-11-07net/smc: do not leave a dangling sk pointer in __smc_create()Eric Dumazet1-1/+3
Thanks to commit 4bbd360a5084 ("socket: Print pf->create() when it does not clear sock->sk on failure."), syzbot found an issue with AF_SMC: smc_create must clear sock->sk on failure, family: 43, type: 1, protocol: 0 WARNING: CPU: 0 PID: 5827 at net/socket.c:1565 __sock_create+0x96f/0xa30 net/socket.c:1563 Modules linked in: CPU: 0 UID: 0 PID: 5827 Comm: syz-executor259 Not tainted 6.12.0-rc6-next-20241106-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__sock_create+0x96f/0xa30 net/socket.c:1563 Code: 03 00 74 08 4c 89 e7 e8 4f 3b 85 f8 49 8b 34 24 48 c7 c7 40 89 0c 8d 8b 54 24 04 8b 4c 24 0c 44 8b 44 24 08 e8 32 78 db f7 90 <0f> 0b 90 90 e9 d3 fd ff ff 89 e9 80 e1 07 fe c1 38 c1 0f 8c ee f7 RSP: 0018:ffffc90003e4fda0 EFLAGS: 00010246 RAX: 099c6f938c7f4700 RBX: 1ffffffff1a595fd RCX: ffff888034823c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 00000000ffffffe9 R08: ffffffff81567052 R09: 1ffff920007c9f50 R10: dffffc0000000000 R11: fffff520007c9f51 R12: ffffffff8d2cafe8 R13: 1ffffffff1a595fe R14: ffffffff9a789c40 R15: ffff8880764298c0 FS: 000055557b518380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa62ff43225 CR3: 0000000031628000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> sock_create net/socket.c:1616 [inline] __sys_socket_create net/socket.c:1653 [inline] __sys_socket+0x150/0x3c0 net/socket.c:1700 __do_sys_socket net/socket.c:1714 [inline] __se_sys_socket net/socket.c:1712 [inline] For reference, see commit 2d859aff775d ("Merge branch 'do-not-leave-dangling-sk-pointers-in-pf-create-functions'") Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Ignat Korchagin <ignat@cloudflare.com> Cc: D. Wythe <alibuda@linux.alibaba.com> Cc: Dust Li <dust.li@linux.alibaba.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Link: https://patch.msgid.link/20241106221922.1544045-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-08-01Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-3/+4
Cross-merge networking fixes after downstream PR. No conflicts or adjacent changes. Link: https://patch.msgid.link/20240801131917.34494-1-pabeni@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-07-31net/smc: remove redundant code in smc_connect_check_aclcZhengchao Shao1-4/+0
When the SMC client perform CLC handshake, it will check whether the clc header type is correct in receiving SMC_CLC_ACCEPT packet. The specific invoking path is as follows: __smc_connect smc_connect_clc smc_clc_wait_msg smc_clc_msg_hdr_valid smc_clc_msg_acc_conf_valid Therefore, the smc_connect_check_aclc interface invoked by __smc_connect does not need to check type again. Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com> Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-07-31net/smc: remove the fallback in __smc_connectZhengchao Shao1-4/+0
When the SMC client begins to connect to server, smcd_version is set to SMC_V1 + SMC_V2. If fail to get VLAN ID, only SMC_V2 information is left in smcd_version. And smcd_version will not be changed to 0. Therefore, remove the fallback caused by the failure to get VLAN ID. Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com> Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-07-30net/smc: prevent UAF in inet_create()D. Wythe1-3/+4
Following syzbot repro crashes the kernel: socketpair(0x2, 0x1, 0x100, &(0x7f0000000140)) (fail_nth: 13) Fix this by not calling sk_common_release() from smc_create_clcsk(). Stack trace: socket: no more sockets ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 1 PID: 5092 at lib/refcount.c:28 refcount_warn_saturate+0x15a/0x1d0 lib/refcount.c:28 Modules linked in: CPU: 1 PID: 5092 Comm: syz-executor424 Not tainted 6.10.0-syzkaller-04483-g0be9ae5486cd #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/27/2024 RIP: 0010:refcount_warn_saturate+0x15a/0x1d0 lib/refcount.c:28 Code: 80 f3 1f 8c e8 e7 69 a8 fc 90 0f 0b 90 90 eb 99 e8 cb 4f e6 fc c6 05 8a 8d e8 0a 01 90 48 c7 c7 e0 f3 1f 8c e8 c7 69 a8 fc 90 <0f> 0b 90 90 e9 76 ff ff ff e8 a8 4f e6 fc c6 05 64 8d e8 0a 01 90 RSP: 0018:ffffc900034cfcf0 EFLAGS: 00010246 RAX: 3b9fcde1c862f700 RBX: ffff888022918b80 RCX: ffff88807b39bc00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000003 R08: ffffffff815878a2 R09: fffffbfff1c39d94 R10: dffffc0000000000 R11: fffffbfff1c39d94 R12: 00000000ffffffe9 R13: 1ffff11004523165 R14: ffff888022918b28 R15: ffff888022918b00 FS: 00005555870e7380(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000140 CR3: 000000007582e000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> inet_create+0xbaf/0xe70 __sock_create+0x490/0x920 net/socket.c:1571 sock_create net/socket.c:1622 [inline] __sys_socketpair+0x2ca/0x720 net/socket.c:1769 __do_sys_socketpair net/socket.c:1822 [inline] __se_sys_socketpair net/socket.c:1819 [inline] __x64_sys_socketpair+0x9b/0xb0 net/socket.c:1819 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fbcb9259669 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 a1 1a 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fffe931c6d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000035 RAX: ffffffffffffffda RBX: 00007fffe931c6f0 RCX: 00007fbcb9259669 RDX: 0000000000000100 RSI: 0000000000000001 RDI: 0000000000000002 RBP: 0000000000000002 R08: 00007fffe931c476 R09: 00000000000000a0 R10: 0000000020000140 R11: 0000000000000246 R12: 00007fffe931c6ec R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001 </TASK> Link: https://lore.kernel.org/r/20240723175809.537291-1-edumazet@google.com/ Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Link: https://patch.msgid.link/1722224415-30999-1-git-send-email-alibuda@linux.alibaba.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-06-17net/smc: Introduce IPPROTO_SMCD. Wythe1-2/+14
This patch allows to create smc socket via AF_INET, similar to the following code, /* create v4 smc sock */ v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); /* create v6 smc sock */ v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); There are several reasons why we believe it is appropriate here: 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) address. There is no AF_SMC address at all. 2. Create smc socket in the AF_INET(6) path, which allows us to reuse the infrastructure of AF_INET(6) path, such as common ebpf hooks. Otherwise, smc have to implement it again in AF_SMC path. Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com> Tested-by: Wenjia Zhang <wenjia@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-17net/smc: expose smc proto operationsD. Wythe1-30/+30
Externalize smc proto operations (smc_xxx) to allow access from files other than af_smc.c This is in preparation for the subsequent implementation of the AF_INET version of SMC. Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com> Tested-by: Wenjia Zhang <wenjia@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-17net/smc: refactoring initialization of smc sockD. Wythe1-38/+48
This patch aims to isolate the shared components of SMC socket allocation by introducing smc_sk_init() for sock initialization and __smc_create_clcsk() for the initialization of clcsock. This is in preparation for the subsequent implementation of the AF_INET version of SMC. Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com> Tested-by: Wenjia Zhang <wenjia@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-05net/smc: avoid overwriting when adjusting sock bufsizesWen Gu1-20/+2
When copying smc settings to clcsock, avoid setting clcsock's sk_sndbuf to sysctl_tcp_wmem[1], since this may overwrite the value set by tcp_sndbuf_expand() in TCP connection establishment. And the other setting sk_{snd|rcv}buf to sysctl value in smc_adjust_sock_bufsizes() can also be omitted since the initialization of smc sock and clcsock has set sk_{snd|rcv}buf to smc.sysctl_{w|r}mem or ipv4_sysctl_tcp_{w|r}mem[1]. Fixes: 30c3c4a4497c ("net/smc: Use correct buffer sizes when switching between TCP and SMC") Link: https://lore.kernel.org/r/5eaf3858-e7fd-4db8-83e8-3d7a3e0e9ae2@linux.alibaba.com Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>, too. Signed-off-by: David S. Miller <davem@davemloft.net>
2024-05-13net: change proto and proto_ops accept typeJens Axboe1-3/+3
Rather than pass in flags, error pointer, and whether this is a kernel invocation or not, add a struct proto_accept_arg struct as the argument. This then holds all of these arguments, and prepares accept for being able to pass back more information. No functional changes in this patch. Acked-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-04-30net/smc: {at|de}tach sndbuf to peer DMB if supportedWen Gu1-0/+16
If the device used by SMC-D supports merging local sndbuf to peer DMB, then create sndbuf descriptor and attach it to peer DMB once peer token is obtained, and detach and free the sndbuf descriptor when the connection is freed. Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-and-tested-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-30net/smc: introduce loopback-ism for SMC intra-OS shortcutWen Gu1-1/+11
This introduces a kind of Emulated-ISM device named loopback-ism for SMCv2.1. The loopback-ism device is currently exclusive for SMC usage, and aims to provide an SMC shortcut for sockets within the same kernel, leading to improved intra-OS traffic performance. Configuration of this feature is managed through the config SMC_LO. Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-and-tested-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-03-29net/smc: make smc_hash_sk/smc_unhash_sk staticZhengchao Shao1-4/+2
smc_hash_sk and smc_unhash_sk are only used in af_smc.c, so make them static and remove the output symbol. They can be called under the path .prot->hash()/unhash(). Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com> Reviewed-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-02-15Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-0/+1
Cross-merge networking fixes after downstream PR. No conflicts. Adjacent changes: net/core/dev.c 9f30831390ed ("net: add rcu safety to rtnl_prop_list_size()") 723de3ebef03 ("net: free altname using an RCU callback") net/unix/garbage.c 11498715f266 ("af_unix: Remove io_uring code for GC.") 25236c91b5ab ("af_unix: Fix task hung while purging oob_skb in GC.") drivers/net/ethernet/renesas/ravb_main.c ed4adc07207d ("net: ravb: Count packets instead of descriptors in GbEth RX path" ) c2da9408579d ("ravb: Add Rx checksum offload support for GbEth") net/mptcp/protocol.c bdd70eb68913 ("mptcp: drop the push_pending field") 28e5c1380506 ("mptcp: annotate lockless accesses around read-mostly fields") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-02-14net: smc: fix spurious error message from __sock_release()Dmitry Antipov1-0/+1
Commit 67f562e3e147 ("net/smc: transfer fasync_list in case of fallback") leaves the socket's fasync list pointer within a container socket as well. When the latter is destroyed, '__sock_release()' warns about its non-empty fasync list, which is a dangling pointer to previously freed fasync list of an underlying TCP socket. Fix this spurious warning by nullifying fasync list of a container socket. Fixes: 67f562e3e147 ("net/smc: transfer fasync_list in case of fallback") Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-02-07net/smc: change the term virtual ISM to Emulated-ISMWen Gu1-11/+11
According to latest release of SMCv2.1[1], the term 'virtual ISM' has been changed to 'Emulated-ISM' to avoid the ambiguity of the word 'virtual' in different contexts. So the names or comments in the code need be modified accordingly. [1] https://www.ibm.com/support/pages/node/7112343 Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Simon Horman <horms@kernel.org> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Link: https://lore.kernel.org/r/20240205033317.127269-1-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-26net/smc: compatible with 128-bits extended GID of virtual ISM deviceWen Gu1-13/+53
According to virtual ISM support feature defined by SMCv2.1, GIDs of virtual ISM device are UUIDs defined by RFC4122, which are 128-bits long. So some adaptation work is required. And note that the GIDs of existing platform firmware ISM devices still remain 64-bits long. Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Alexandra Winter <wintera@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-12-26net/smc: unify the structs of accept or confirm message for v1 and v2Wen Gu1-35/+17
The structs of CLC accept and confirm messages for SMCv1 and SMCv2 are separately defined and often casted to each other in the code, which may increase the risk of errors caused by future divergence of them. So unify them into one struct for better maintainability. Suggested-by: Alexandra Winter <wintera@linux.ibm.com> Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Alexandra Winter <wintera@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-12-07Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-2/+2
Cross-merge networking fixes after downstream PR. Conflicts: drivers/net/ethernet/stmicro/stmmac/dwmac5.c drivers/net/ethernet/stmicro/stmmac/dwmac5.h drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c drivers/net/ethernet/stmicro/stmmac/hwif.h 37e4b8df27bc ("net: stmmac: fix FPE events losing") c3f3b97238f6 ("net: stmmac: Refactor EST implementation") https://lore.kernel.org/all/20231206110306.01e91114@canb.auug.org.au/ Adjacent changes: net/ipv4/tcp_ao.c 9396c4ee93f9 ("net/tcp: Don't store TCP-AO maclen on reqsk") 7b0f570f879a ("tcp: Move TCP-AO bits from cookie_v[46]_check() to tcp_ao_syncookie().") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-07net/smc: fix missing byte order conversion in CLC handshakeWen Gu1-2/+2
The byte order conversions of ISM GID and DMB token are missing in process of CLC accept and confirm. So fix it. Fixes: 3d9725a6a133 ("net/smc: common routine for CLC accept and confirm") Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Reviewed-by: Alexandra Winter <wintera@linux.ibm.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Link: https://lore.kernel.org/r/1701882157-87956-1-git-send-email-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-24net/smc: add sysctl for max links per lgr for SMC-R v2.1Guangguan Wang1-1/+1
Add a new sysctl: net.smc.smcr_max_links_per_lgr, which is used to control the preferred max links per lgr for SMC-R v2.1. The default value of this sysctl is 2, and the acceptable value ranges from 1 to 2. Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-22net/smc: avoid data corruption caused by declineD. Wythe1-2/+6
We found a data corruption issue during testing of SMC-R on Redis applications. The benchmark has a low probability of reporting a strange error as shown below. "Error: Protocol error, got "\xe2" as reply type byte" Finally, we found that the retrieved error data was as follows: 0xE2 0xD4 0xC3 0xD9 0x04 0x00 0x2C 0x20 0xA6 0x56 0x00 0x16 0x3E 0x0C 0xCB 0x04 0x02 0x01 0x00 0x00 0x20 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xE2 It is quite obvious that this is a SMC DECLINE message, which means that the applications received SMC protocol message. We found that this was caused by the following situations: client server ¦ clc proposal -------------> ¦ clc accept <------------- ¦ clc confirm -------------> wait llc confirm send llc confirm ¦failed llc confirm ¦ x------ (after 2s)timeout wait llc confirm rsp wait decline (after 1s) timeout (after 2s) timeout ¦ decline --------------> ¦ decline <-------------- As a result, a decline message was sent in the implementation, and this message was read from TCP by the already-fallback connection. This patch double the client timeout as 2x of the server value, With this simple change, the Decline messages should never cross or collide (during Confirm link timeout). This issue requires an immediate solution, since the protocol updates involve a more long-term solution. Fixes: 0fb0b02bd6fd ("net/smc: adapt SMC client code to use the LLC flow") Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> Reviewed-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-06net/smc: fix dangling sock under state SMC_APPFINCLOSEWAITD. Wythe1-2/+2
Considering scenario: smc_cdc_rx_handler __smc_release sock_set_flag smc_close_active() sock_set_flag __set_bit(DEAD) __set_bit(DONE) Dues to __set_bit is not atomic, the DEAD or DONE might be lost. if the DEAD flag lost, the state SMC_CLOSED will be never be reached in smc_close_passive_work: if (sock_flag(sk, SOCK_DEAD) && smc_close_sent_any_close(conn)) { sk->sk_state = SMC_CLOSED; } else { /* just shutdown, but not yet closed locally */ sk->sk_state = SMC_APPFINCLOSEWAIT; } Replace sock_set_flags or __set_bit to set_bit will fix this problem. Since set_bit is atomic. Fixes: b38d732477e4 ("smc: socket closing and linkgroup cleanup") Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-19Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netJakub Kicinski1-2/+3
Cross-merge networking fixes after downstream PR. net/mac80211/key.c 02e0e426a2fb ("wifi: mac80211: fix error path key leak") 2a8b665e6bcc ("wifi: mac80211: remove key_mtx") 7d6904bf26b9 ("Merge wireless into wireless-next") https://lore.kernel.org/all/20231012113648.46eea5ec@canb.auug.org.au/ Adjacent changes: drivers/net/ethernet/ti/Kconfig a602ee3176a8 ("net: ethernet: ti: Fix mixed module-builtin object") 98bdeae9502b ("net: cpmac: remove driver to prepare for platform removal") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16net/smc: return the right falback reason when prefix checks failDust Li1-1/+1
In the smc_listen_work(), if smc_listen_prfx_check() failed, the real reason: SMC_CLC_DECL_DIFFPREFIX was dropped, and SMC_CLC_DECL_NOSMCDEV was returned. Althrough this is also kind of SMC_CLC_DECL_NOSMCDEV, but return the real reason is much friendly for debugging. Fixes: e49300a6bf62 ("net/smc: add listen processing for SMC-Rv2") Signed-off-by: Dust Li <dust.li@linux.alibaba.com> Reviewed-by: Alexandra Winter <wintera@linux.ibm.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Link: https://lore.kernel.org/r/20231012123729.29307-1-dust.li@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-13net/smc: fix smc clc failed issue when netdevice not in init_netAlbert Huang1-1/+2
If the netdevice is within a container and communicates externally through network technologies such as VxLAN, we won't be able to find routing information in the init_net namespace. To address this issue, we need to add a struct net parameter to the smc_ib_find_route function. This allow us to locate the routing information within the corresponding net namespace, ensuring the correct completion of the SMC CLC interaction. Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment") Signed-off-by: Albert Huang <huangjie.albert@bytedance.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-01net: implement lockless SO_PRIORITYEric Dumazet1-1/+1
This is a followup of 8bf43be799d4 ("net: annotate data-races around sk->sk_priority"). sk->sk_priority can be read and written without holding the socket lock. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21net: annotate data-races around sk->sk_lingertimeEric Dumazet1-1/+1
sk_getsockopt() runs locklessly. This means sk->sk_lingertime can be read while other threads are changing its value. Other reads also happen without socket lock being held, and must be annotated. Remove preprocessor logic using BITS_PER_LONG, compilers are smart enough to figure this by themselves. v2: fixed a clang W=1 (-Wtautological-constant-out-of-range-compare) warning (Jakub) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-19net/smc: support max links per lgr negotiation in clc handshakeGuangguan Wang1-17/+27
Support max links per lgr negotiation in clc handshake for SMCR v2.1, which is one of smc v2.1 features. Server makes decision for the final value of max links based on the client preferred max links and self-preferred max links. Here use the minimum value of the client preferred max links and server preferred max links. Client Server Proposal(max links(client preferred)) --------------------------------------> Accept(max links(accepted value)) accepted value=min(client preferred, server preferred) <------------------------------------- Confirm(max links(accepted value)) -------------------------------------> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Reviewed-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-19net/smc: support max connections per lgr negotiationGuangguan Wang1-0/+1
Support max connections per lgr negotiation for SMCR v2.1, which is one of smc v2.1 features. Server makes decision for the final value of max conns based on the client preferred max conns and self-preferred max conns. Here use the minimum value of client preferred max conns and server preferred max conns. Client Server Proposal(max conns(client preferred)) ------------------------------------> Accept(max conns(accepted value)) accepted value=min(client preferred, server preferred) <----------------------------------- Confirm(max conns(accepted value)) -----------------------------------> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Reviewed-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-19net/smc: support smc v2.x features validateGuangguan Wang1-0/+18
Support SMC v2.x features validate for SMC v2.1. This is the frame code for SMC v2.x features validate, and will take effects only when the negotiated release version is v2.1 or later. For Server, v2.x features' validation should be done in smc_clc_srv_ v2x_features_validate when receiving v2.1 or later CLC Proposal Message, such as max conns, max links negotiation, the decision of the final value of max conns and max links should be made in this function. And final check for server when receiving v2.1 or later CLC Confirm Message should be done in smc_clc_v2x_features_confirm_check. For client, v2.x features' validation should be done in smc_clc_clnt_ v2x_features_validate when receiving v2.1 or later CLC Accept Message, for example, the decision to accpt the accepted value or to decline should be made in this function. Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Reviewed-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-19net/smc: add vendor unique experimental options area in clc handshakeGuangguan Wang1-1/+1
Add vendor unique experimental options area in clc handshake. In clc accept and confirm msg, vendor unique experimental options use the 16-Bytes reserved field, which defined in struct smc_clc_fce_gid_ext in previous version. Because of the struct smc_clc_first_contact_ext is widely used and limit the scope of modification, this patch moves the 16-Bytes reserved field out of struct smc_clc_fce_gid_ext, and followed with the struct smc_clc_first_contact_ext in a new struct names struct smc_clc_first_contact_ext_v2x. For SMC-R first connection, in previous version, the struct smc_clc_ first_contact_ext and the 16-Bytes reserved field has already been included in clc accept and confirm msg. Thus, this patch use struct smc_clc_first_contact_ext_v2x instead of the struct smc_clc_first_ contact_ext and the 16-Bytes reserved field in SMC-R clc accept and confirm msg is compatible with previous version. For SMC-D first connection, in previous version, only the struct smc_ clc_first_contact_ext is included in clc accept and confirm msg, and the 16-Bytes reserved field is not included. Thus, when the negotiated smc release version is the version before v2.1, we still use struct smc_clc_first_contact_ext for compatible consideration. If the negotiated smc release version is v2.1 or later, use struct smc_clc_first_contact_ ext_v2x instead. Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Reviewed-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-19net/smc: support smc release version negotiation in clc handshakeGuangguan Wang1-4/+17
Support smc release version negotiation in clc handshake based on SMC v2, where no negotiation process for different releases, but for different versions. The latest smc release version was updated to v2.1. And currently there are two release versions of SMCv2, v2.0 and v2.1. In the release version negotiation, client sends the preferred release version by CLC Proposal Message, server makes decision for which release version to use based on the client preferred release version and self-supported release version (here choose the minimum release version of the client preferred and server latest supported), then the decision returns to client by CLC Accept Message. Client confirms the decision by CLC Confirm Message. Client Server Proposal(preferred release version) ------------------------------------> Accept(accpeted release version) min(client preferred, server latest supported) <------------------------------------ Confirm(accpeted release version) ------------------------------------> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Reviewed-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-09net/smc: Use correct buffer sizes when switching between TCP and SMCGerd Bayer1-22/+51
Tuning of the effective buffer size through setsockopts was working for SMC traffic only but not for TCP fall-back connections even before commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable"). That change made it apparent that TCP fall-back connections would use net.smc.[rw]mem as buffer size instead of net.ipv4_tcp_[rw]mem. Amend the code that copies attributes between the (TCP) clcsock and the SMC socket and adjust buffer sizes appropriately: - Copy over sk_userlocks so that both sockets agree on whether tuning via setsockopt is active. - When falling back to TCP use sk_sndbuf or sk_rcvbuf as specified with setsockopt. Otherwise, use the sysctl value for TCP/IPv4. - Likewise, use either values from setsockopt or from sysctl for SMC (duplicated) on successful SMC connect. In smc_tcp_listen_work() drop the explicit copy of buffer sizes as that is taken care of by the attribute copy. Fixes: 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable") Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-09net/smc: Fix setsockopt and sysctl to specify same buffer size againGerd Bayer1-2/+2
Commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable") introduced the net.smc.rmem and net.smc.wmem sysctls to specify the size of buffers to be used for SMC type connections. This created a regression for users that specified the buffer size via setsockopt() as the effective buffer size was now doubled. Re-introduce the division by 2 in the SMC buffer create code and level this out by duplicating the net.smc.[rw]mem values used for initializing sk_rcvbuf/sk_sndbuf at socket creation time. This gives users of both methods (setsockopt or sysctl) the effective buffer size that they expect. Initialize net.smc.[rw]mem from its own constant of 64kB, respectively. Internal performance tests show that this value is a good compromise between throughput/latency and memory consumption. Also, this decouples it from any tuning that was done to net.ipv4.tcp_[rw]mem[1] before the module for SMC protocol was loaded. Check that no more than INT_MAX / 2 is assigned to net.smc.[rw]mem, in order to avoid any overflow condition when that is doubled for use in sk_sndbuf or sk_rcvbuf. While at it, drop the confusing sk_buf_size variable from __smc_buf_create and name "compressed" buffer size variables more consistently. Background: Before the commit mentioned above, SMC's buffer allocator in __smc_buf_create() always used half of the sockets' sk_rcvbuf/sk_sndbuf value as initial value to search for appropriate buffers. If the search resorted to using a bigger buffer when all buffers of the specified size were busy, the duplicate of the used effective buffer size is stored back to sk_rcvbuf/sk_sndbuf. When available, buffers of exactly the size that a user had specified as input to setsockopt() were used, despite setsockopt()'s documentation in "man 7 socket" talking of a mandatory duplication: [...] SO_SNDBUF Sets or gets the maximum socket send buffer in bytes. The kernel doubles this value (to allow space for book‐ keeping overhead) when it is set using setsockopt(2), and this doubled value is returned by getsockopt(2). The default value is set by the /proc/sys/net/core/wmem_default file and the maximum allowed value is set by the /proc/sys/net/core/wmem_max file. The minimum (doubled) value for this option is 2048. [...] Fixes: 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable") Co-developed-by: Jan Karcher <jaka@linux.ibm.com> Signed-off-by: Jan Karcher <jaka@linux.ibm.com> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-07-29net: annotate data-races around sk->sk_markEric Dumazet1-1/+1
sk->sk_mark is often read while another thread could change the value. Fixes: 4a19ec5800fc ("[NET]: Introducing socket mark socket option.") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-06-24smc: Drop smc_sendpage() in favour of smc_sendmsg() + MSG_SPLICE_PAGESDavid Howells1-29/+0
Drop the smc_sendpage() code as smc_sendmsg() just passes the call down to the underlying TCP socket and smc_tx_sendpage() is just a wrapper around its sendmsg implementation. Signed-off-by: David Howells <dhowells@redhat.com> cc: Karsten Graul <kgraul@linux.ibm.com> cc: Wenjia Zhang <wenjia@linux.ibm.com> cc: Jan Karcher <jaka@linux.ibm.com> cc: "D. Wythe" <alibuda@linux.alibaba.com> cc: Tony Lu <tonylu@linux.alibaba.com> cc: Wen Gu <guwen@linux.alibaba.com> cc: Jens Axboe <axboe@kernel.dk> cc: Matthew Wilcox <willy@infradead.org> Link: https://lore.kernel.org/r/20230623225513.2732256-10-dhowells@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-05-19net/smc: Reset connection when trying to use SMCRv2 fails.Wen Gu1-2/+7
We found a crash when using SMCRv2 with 2 Mellanox ConnectX-4. It can be reproduced by: - smc_run nginx - smc_run wrk -t 32 -c 500 -d 30 http://<ip>:<port> BUG: kernel NULL pointer dereference, address: 0000000000000014 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 8000000108713067 P4D 8000000108713067 PUD 151127067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 4 PID: 2441 Comm: kworker/4:249 Kdump: loaded Tainted: G W E 6.4.0-rc1+ #42 Workqueue: smc_hs_wq smc_listen_work [smc] RIP: 0010:smc_clc_send_confirm_accept+0x284/0x580 [smc] RSP: 0018:ffffb8294b2d7c78 EFLAGS: 00010a06 RAX: ffff8f1873238880 RBX: ffffb8294b2d7dc8 RCX: 0000000000000000 RDX: 00000000000000b4 RSI: 0000000000000001 RDI: 0000000000b40c00 RBP: ffffb8294b2d7db8 R08: ffff8f1815c5860c R09: 0000000000000000 R10: 0000000000000400 R11: 0000000000000000 R12: ffff8f1846f56180 R13: ffff8f1815c5860c R14: 0000000000000001 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8f1aefd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000014 CR3: 00000001027a0001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> ? mlx5_ib_map_mr_sg+0xa1/0xd0 [mlx5_ib] ? smcr_buf_map_link+0x24b/0x290 [smc] ? __smc_buf_create+0x4ee/0x9b0 [smc] smc_clc_send_accept+0x4c/0xb0 [smc] smc_listen_work+0x346/0x650 [smc] ? __schedule+0x279/0x820 process_one_work+0x1e5/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 </TASK> During the CLC handshake, server sequentially tries available SMCRv2 and SMCRv1 devices in smc_listen_work(). If an SMCRv2 device is found. SMCv2 based link group and link will be assigned to the connection. Then assumed that some buffer assignment errors happen later in the CLC handshake, such as RMB registration failure, server will give up SMCRv2 and try SMCRv1 device instead. But the resources assigned to the connection won't be reset. When server tries SMCRv1 device, the connection creation process will be executed again. Since conn->lnk has been assigned when trying SMCRv2, it will not be set to the correct SMCRv1 link in smcr_lgr_conn_assign_link(). So in such situation, conn->lgr points to correct SMCRv1 link group but conn->lnk points to the SMCRv2 link mistakenly. Then in smc_clc_send_confirm_accept(), conn->rmb_desc->mr[link->link_idx] will be accessed. Since the link->link_idx is not correct, the related MR may not have been initialized, so crash happens. | Try SMCRv2 device first | |-> conn->lgr: assign existed SMCRv2 link group; | |-> conn->link: assign existed SMCRv2 link (link_idx may be 1 in SMC_LGR_SYMMETRIC); | |-> sndbuf & RMB creation fails, quit; | | Try SMCRv1 device then | |-> conn->lgr: create SMCRv1 link group and assign; | |-> conn->link: keep SMCRv2 link mistakenly; | |-> sndbuf & RMB creation succeed, only RMB->mr[link_idx = 0] | initialized. | | Then smc_clc_send_confirm_accept() accesses | conn->rmb_desc->mr[conn->link->link_idx, which is 1], then crash. v This patch tries to fix this by cleaning conn->lnk before assigning link. In addition, it is better to reset the connection and clean the resources assigned if trying SMCRv2 failed in buffer creation or registration. Fixes: e49300a6bf62 ("net/smc: add listen processing for SMC-Rv2") Link: https://lore.kernel.org/r/20220523055056.2078994-1-liuyacan@corp.netease.com/ Signed-off-by: Wen Gu <guwen@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-04-12smc: Fix use-after-free in tcp_write_timer_handler().Kuniyuki Iwashima1-0/+11
With Eric's ref tracker, syzbot finally found a repro for use-after-free in tcp_write_timer_handler() by kernel TCP sockets. [0] If SMC creates a kernel socket in __smc_create(), the kernel socket is supposed to be freed in smc_clcsock_release() by calling sock_release() when we close() the parent SMC socket. However, at the end of smc_clcsock_release(), the kernel socket's sk_state might not be TCP_CLOSE. This means that we have not called inet_csk_destroy_sock() in __tcp_close() and have not stopped the TCP timers. The kernel socket's TCP timers can be fired later, so we need to hold a refcnt for net as we do for MPTCP subflows in mptcp_subflow_create_socket(). [0]: leaked reference. sk_alloc (./include/net/net_namespace.h:335 net/core/sock.c:2108) inet_create (net/ipv4/af_inet.c:319 net/ipv4/af_inet.c:244) __sock_create (net/socket.c:1546) smc_create (net/smc/af_smc.c:3269 net/smc/af_smc.c:3284) __sock_create (net/socket.c:1546) __sys_socket (net/socket.c:1634 net/socket.c:1618 net/socket.c:1661) __x64_sys_socket (net/socket.c:1672) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) ================================================================== BUG: KASAN: slab-use-after-free in tcp_write_timer_handler (net/ipv4/tcp_timer.c:378 net/ipv4/tcp_timer.c:624 net/ipv4/tcp_timer.c:594) Read of size 1 at addr ffff888052b65e0d by task syzrepro/18091 CPU: 0 PID: 18091 Comm: syzrepro Tainted: G W 6.3.0-rc4-01174-gb5d54eb5899a #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.amzn2022.0.1 04/01/2014 Call Trace: <IRQ> dump_stack_lvl (lib/dump_stack.c:107) print_report (mm/kasan/report.c:320 mm/kasan/report.c:430) kasan_report (mm/kasan/report.c:538) tcp_write_timer_handler (net/ipv4/tcp_timer.c:378 net/ipv4/tcp_timer.c:624 net/ipv4/tcp_timer.c:594) tcp_write_timer (./include/linux/spinlock.h:390 net/ipv4/tcp_timer.c:643) call_timer_fn (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/timer.h:127 kernel/time/timer.c:1701) __run_timers.part.0 (kernel/time/timer.c:1752 kernel/time/timer.c:2022) run_timer_softirq (kernel/time/timer.c:2037) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:572) __irq_exit_rcu (kernel/softirq.c:445 kernel/softirq.c:650) irq_exit_rcu (kernel/softirq.c:664) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1107 (discriminator 14)) </IRQ> Fixes: ac7138746e14 ("smc: establish new socket family") Reported-by: syzbot+7e1e1bdb852961150198@syzkaller.appspotmail.com Link: https://lore.kernel.org/netdev/000000000000a3f51805f8bcc43a@google.com/ Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>