From db10538a4b997a77a1fd561adaaa58afc7dcfa2f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:18 +0200 Subject: tcp: add tcp_sock_set_cork Add a helper to directly set the TCP_CORK sockopt from kernel space without going through a fake uaccess. Cleanup the callers to avoid pointless wrappers now that this is a simple function call. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bf44e85d709d..889eeb2256c2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -497,4 +497,6 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); +void tcp_sock_set_cork(struct sock *sk, bool on); + #endif /* _LINUX_TCP_H */ -- cgit v1.3-6-gb490 From 12abc5ee7873a085cc280240822b8ac53c86fecd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:19 +0200 Subject: tcp: add tcp_sock_set_nodelay Add a helper to directly set the TCP_NODELAY sockopt from kernel space without going through a fake uaccess. Cleanup the callers to avoid pointless wrappers now that this is a simple function call. Signed-off-by: Christoph Hellwig Acked-by: Sagi Grimberg Acked-by: Jason Gunthorpe Signed-off-by: David S. Miller --- drivers/block/drbd/drbd_int.h | 7 ------ drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/infiniband/sw/siw/siw_cm.c | 24 ++++--------------- drivers/nvme/host/tcp.c | 9 +------ drivers/nvme/target/tcp.c | 12 ++-------- drivers/target/iscsi/iscsi_target_login.c | 15 +++--------- fs/cifs/connect.c | 10 ++------ fs/dlm/lowcomms.c | 8 ++----- fs/ocfs2/cluster/tcp.c | 20 ++-------------- include/linux/tcp.h | 1 + net/ceph/messenger.c | 11 ++------- net/ipv4/tcp.c | 39 ++++++++++++++++++++----------- net/rds/tcp.c | 11 +-------- net/rds/tcp.h | 1 - net/rds/tcp_listen.c | 2 +- 16 files changed, 49 insertions(+), 127 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3550adc93c68..e24bba87c8e0 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); -static inline void drbd_tcp_nodelay(struct socket *sock) -{ - int val = 1; - (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char*)&val, sizeof(val)); -} - static inline void drbd_tcp_quickack(struct socket *sock) { int val = 2; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c094c3c2c5d4..45fbd526c453 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -660,7 +660,7 @@ static int __send_command(struct drbd_connection *connection, int vnr, /* DRBD protocol "pings" are latency critical. * This is supposed to trigger tcp_push_pending_frames() */ if (!err && (cmd == P_PING || cmd == P_PING_ACK)) - drbd_tcp_nodelay(sock->socket); + tcp_sock_set_nodelay(sock->socket->sk); return err; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 55ea907ad33c..20a5e94494ac 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1051,8 +1051,8 @@ randomize: /* we don't want delays. * we use TCP_CORK where appropriate, though */ - drbd_tcp_nodelay(sock.socket); - drbd_tcp_nodelay(msock.socket); + tcp_sock_set_nodelay(sock.socket->sk); + tcp_sock_set_nodelay(msock.socket->sk); connection->data.socket = sock.socket; connection->meta.socket = msock.socket; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index d1860f3e8740..1662216be66d 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -947,16 +947,8 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; - if (siw_tcp_nagle == false) { - int val = 1; - - rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - if (rv) { - siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv); - goto error; - } - } + if (siw_tcp_nagle == false) + tcp_sock_set_nodelay(new_s->sk); new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT); @@ -1386,16 +1378,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv); goto error; } - if (siw_tcp_nagle == false) { - int val = 1; - - rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val, - sizeof(val)); - if (rv) { - siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv); - goto error; - } - } + if (siw_tcp_nagle == false) + tcp_sock_set_nodelay(s->sk); cep = siw_cep_alloc(sdev); if (!cep) { rv = -ENOMEM; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index a307972d33a0..4e4a750ecdb9 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1346,14 +1346,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, } /* Set TCP no delay */ - opt = 1; - ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, - TCP_NODELAY, (char *)&opt, sizeof(opt)); - if (ret) { - dev_err(nctrl->device, - "failed to set TCP_NODELAY sock opt %d\n", ret); - goto err_sock; - } + tcp_sock_set_nodelay(queue->sock->sk); /* * Cleanup whatever is sitting in the TCP transmit queue on socket diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f3088156d01d..55bc4c3c0a74 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1580,7 +1580,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) { struct nvmet_tcp_port *port; __kernel_sa_family_t af; - int opt, ret; + int ret; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) @@ -1625,15 +1625,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) port->data_ready = port->sock->sk->sk_data_ready; port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready; sock_set_reuseaddr(port->sock->sk); - - opt = 1; - ret = kernel_setsockopt(port->sock, IPPROTO_TCP, - TCP_NODELAY, (char *)&opt, sizeof(opt)); - if (ret) { - pr_err("failed to set TCP_NODELAY sock opt %d\n", ret); - goto err_sock; - } - + tcp_sock_set_nodelay(port->sock->sk); if (so_priority > 0) sock_set_priority(port->sock->sk, so_priority); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 91acb3f07b4c..b561b07a869a 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -897,20 +897,11 @@ int iscsit_setup_np( /* * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY. */ - /* FIXME: Someone please explain why this is endian-safe */ - opt = 1; - if (np->np_network_transport == ISCSI_TCP) { - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - (char *)&opt, sizeof(opt)); - if (ret < 0) { - pr_err("kernel_setsockopt() for TCP_NODELAY" - " failed: %d\n", ret); - goto fail; - } - } - + if (np->np_network_transport == ISCSI_TCP) + tcp_sock_set_nodelay(sock->sk); sock_set_reuseaddr(sock->sk); + opt = 1; ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND, (char *)&opt, sizeof(opt)); if (ret < 0) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 28268ed461b8..ad8fb53b3682 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3929,14 +3929,8 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_rcvbuf = 140 * 1024; } - if (server->tcp_nodelay) { - int val = 1; - rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - if (rc) - cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n", - rc); - } + if (server->tcp_nodelay) + tcp_sock_set_nodelay(socket->sk); cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n", socket->sk->sk_sndbuf, diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 2822a430a2b4..69333728d871 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1011,7 +1011,6 @@ static void tcp_connect_to_sock(struct connection *con) struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock = NULL; - int one = 1; int result; if (con->nodeid == 0) { @@ -1060,8 +1059,7 @@ static void tcp_connect_to_sock(struct connection *con) log_print("connecting to %d", con->nodeid); /* Turn off Nagle's algorithm */ - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, - sizeof(one)); + tcp_sock_set_nodelay(sock->sk); result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, O_NONBLOCK); @@ -1103,7 +1101,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con, { struct socket *sock = NULL; int result = 0; - int one = 1; int addr_len; if (dlm_local_addr[0]->ss_family == AF_INET) @@ -1120,8 +1117,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con, } /* Turn off Nagle's algorithm */ - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, - sizeof(one)); + tcp_sock_set_nodelay(sock->sk); sock_set_reuseaddr(sock->sk); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2c512b40a940..4c70fe9d19ab 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work) sc_put(sc); } -static int o2net_set_nodelay(struct socket *sock) -{ - int val = 1; - - return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (void *)&val, sizeof(val)); -} - static int o2net_set_usertimeout(struct socket *sock) { int user_timeout = O2NET_TCP_USER_TIMEOUT; @@ -1636,11 +1628,7 @@ static void o2net_start_connect(struct work_struct *work) goto out; } - ret = o2net_set_nodelay(sc->sc_sock); - if (ret) { - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); - goto out; - } + tcp_sock_set_nodelay(sc->sc_sock->sk); ret = o2net_set_usertimeout(sock); if (ret) { @@ -1832,11 +1820,7 @@ static int o2net_accept_one(struct socket *sock, int *more) *more = 1; new_sock->sk->sk_allocation = GFP_ATOMIC; - ret = o2net_set_nodelay(new_sock); - if (ret) { - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); - goto out; - } + tcp_sock_set_nodelay(new_sock->sk); ret = o2net_set_usertimeout(new_sock); if (ret) { diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 889eeb2256c2..9e42c7fe50a8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -498,5 +498,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); void tcp_sock_set_cork(struct sock *sk, bool on); +void tcp_sock_set_nodelay(struct sock *sk); #endif /* _LINUX_TCP_H */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f8ca5edc5f2c..27d6ab11f9ee 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } - if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) { - int optval = 1; - - ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&optval, sizeof(optval)); - if (ret) - pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", - ret); - } + if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) + tcp_sock_set_nodelay(sock->sk); con->sock = sock; return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e6cf702e16d6..a65f293a19fa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2832,6 +2832,30 @@ void tcp_sock_set_cork(struct sock *sk, bool on) } EXPORT_SYMBOL(tcp_sock_set_cork); +/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is + * remembered, but it is not activated until cork is cleared. + * + * However, when TCP_NODELAY is set we make an explicit push, which overrides + * even TCP_CORK for currently queued segments. + */ +static void __tcp_sock_set_nodelay(struct sock *sk, bool on) +{ + if (on) { + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; + tcp_push_pending_frames(sk); + } else { + tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF; + } +} + +void tcp_sock_set_nodelay(struct sock *sk) +{ + lock_sock(sk); + __tcp_sock_set_nodelay(sk, true); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_nodelay); + /* * Socket option code for TCP. */ @@ -2929,20 +2953,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_NODELAY: - if (val) { - /* TCP_NODELAY is weaker than TCP_CORK, so that - * this option on corked socket is remembered, but - * it is not activated until cork is cleared. - * - * However, when TCP_NODELAY is set we make - * an explicit push, which overrides even TCP_CORK - * for currently queued segments. - */ - tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; - tcp_push_pending_frames(sk); - } else { - tp->nonagle &= ~TCP_NAGLE_OFF; - } + __tcp_sock_set_nodelay(sk, val); break; case TCP_THIN_LINEAR_TIMEOUTS: diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 46782fac4c16..43db0eca911f 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = { { } }; -/* doing it this way avoids calling tcp_sk() */ -void rds_tcp_nonagle(struct socket *sock) -{ - int val = 1; - - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val, - sizeof(val)); -} - u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { /* seq# of the last byte of data in tcp send buffer */ @@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock) struct net *net = sock_net(sk); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); lock_sock(sk); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index d640e210b97b..f6d75d8cb167 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,7 +50,6 @@ struct rds_tcp_statistics { /* tcp.c */ void rds_tcp_tune(struct socket *sock); -void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d8bd13276959..6f90ea077adc 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -288,7 +288,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) } sock->sk->sk_reuse = SK_CAN_REUSE; - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_user_data = sock->sk->sk_data_ready; -- cgit v1.3-6-gb490 From ddd061b8daed3ce0c01109a69c9a2a9f9669f01a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:20 +0200 Subject: tcp: add tcp_sock_set_quickack Add a helper to directly set the TCP_QUICKACK sockopt from kernel space without going through a fake uaccess. Cleanup the callers to avoid pointless wrappers now that this is a simple function call. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/block/drbd/drbd_int.h | 7 ------- drivers/block/drbd/drbd_receiver.c | 5 ++--- include/linux/tcp.h | 1 + net/ipv4/tcp.c | 39 +++++++++++++++++++++++++------------- 4 files changed, 29 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e24bba87c8e0..14345a87c7cc 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); -static inline void drbd_tcp_quickack(struct socket *sock) -{ - int val = 2; - (void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, - (char*)&val, sizeof(val)); -} - /* sets the number of 512 byte sectors of our virtual device */ void drbd_set_my_capacity(struct drbd_device *device, sector_t size); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 20a5e94494ac..3a3f2b6a821f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1223,7 +1223,7 @@ static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, str * quickly as possible, and let remote TCP know what we have * received so far. */ if (err == -EAGAIN) { - drbd_tcp_quickack(connection->data.socket); + tcp_sock_set_quickack(connection->data.socket->sk, 2); drbd_unplug_all_devices(connection); } if (err > 0) { @@ -4959,8 +4959,7 @@ static int receive_UnplugRemote(struct drbd_connection *connection, struct packe { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ - drbd_tcp_quickack(connection->data.socket); - + tcp_sock_set_quickack(connection->data.socket->sk, 2); return 0; } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9e42c7fe50a8..2eaf8320b9db 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -499,5 +499,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, void tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); +void tcp_sock_set_quickack(struct sock *sk, int val); #endif /* _LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a65f293a19fa..27b5e7a4e2ef 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2856,6 +2856,31 @@ void tcp_sock_set_nodelay(struct sock *sk) } EXPORT_SYMBOL(tcp_sock_set_nodelay); +static void __tcp_sock_set_quickack(struct sock *sk, int val) +{ + if (!val) { + inet_csk_enter_pingpong_mode(sk); + return; + } + + inet_csk_exit_pingpong_mode(sk); + if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && + inet_csk_ack_scheduled(sk)) { + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED; + tcp_cleanup_rbuf(sk, 1); + if (!(val & 1)) + inet_csk_enter_pingpong_mode(sk); + } +} + +void tcp_sock_set_quickack(struct sock *sk, int val) +{ + lock_sock(sk); + __tcp_sock_set_quickack(sk, val); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_quickack); + /* * Socket option code for TCP. */ @@ -3096,19 +3121,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_QUICKACK: - if (!val) { - inet_csk_enter_pingpong_mode(sk); - } else { - inet_csk_exit_pingpong_mode(sk); - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - inet_csk_ack_scheduled(sk)) { - icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; - tcp_cleanup_rbuf(sk, 1); - if (!(val & 1)) - inet_csk_enter_pingpong_mode(sk); - } - } + __tcp_sock_set_quickack(sk, val); break; #ifdef CONFIG_TCP_MD5SIG -- cgit v1.3-6-gb490 From 557eadfcc5ee8f8fa98a795e05ed21db58a65db5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:21 +0200 Subject: tcp: add tcp_sock_set_syncnt Add a helper to directly set the TCP_SYNCNT sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Acked-by: Sagi Grimberg Signed-off-by: David S. Miller --- drivers/nvme/host/tcp.c | 9 +-------- include/linux/tcp.h | 1 + net/ipv4/tcp.c | 12 ++++++++++++ 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4e4a750ecdb9..2872584f52f6 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1336,14 +1336,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, } /* Single syn retry */ - opt = 1; - ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT, - (char *)&opt, sizeof(opt)); - if (ret) { - dev_err(nctrl->device, - "failed to set TCP_SYNCNT sock opt %d\n", ret); - goto err_sock; - } + tcp_sock_set_syncnt(queue->sock->sk, 1); /* Set TCP no delay */ tcp_sock_set_nodelay(queue->sock->sk); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2eaf8320b9db..6aa4ae5ebf3d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -500,5 +500,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, void tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); +int tcp_sock_set_syncnt(struct sock *sk, int val); #endif /* _LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 27b5e7a4e2ef..d2c67ae1da07 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2881,6 +2881,18 @@ void tcp_sock_set_quickack(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_sock_set_quickack); +int tcp_sock_set_syncnt(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_SYNCNT) + return -EINVAL; + + lock_sock(sk); + inet_csk(sk)->icsk_syn_retries = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_syncnt); + /* * Socket option code for TCP. */ -- cgit v1.3-6-gb490 From c488aeadcbd002a992593e6090d54e8ac27c4310 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:22 +0200 Subject: tcp: add tcp_sock_set_user_timeout Add a helper to directly set the TCP_USER_TIMEOUT sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- fs/ocfs2/cluster/tcp.c | 22 ++-------------------- include/linux/tcp.h | 1 + net/ipv4/tcp.c | 8 ++++++++ net/sunrpc/xprtsock.c | 3 +-- 4 files changed, 12 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 4c70fe9d19ab..79a231719460 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work) sc_put(sc); } -static int o2net_set_usertimeout(struct socket *sock) -{ - int user_timeout = O2NET_TCP_USER_TIMEOUT; - - return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (void *)&user_timeout, sizeof(user_timeout)); -} - static void o2net_initialize_handshake(void) { o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( @@ -1629,12 +1621,7 @@ static void o2net_start_connect(struct work_struct *work) } tcp_sock_set_nodelay(sc->sc_sock->sk); - - ret = o2net_set_usertimeout(sock); - if (ret) { - mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); - goto out; - } + tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT); o2net_register_callbacks(sc->sc_sock->sk, sc); @@ -1821,12 +1808,7 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->sk->sk_allocation = GFP_ATOMIC; tcp_sock_set_nodelay(new_sock->sk); - - ret = o2net_set_usertimeout(new_sock); - if (ret) { - mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); - goto out; - } + tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT); ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1); if (ret < 0) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6aa4ae5ebf3d..de682143efe4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -501,5 +501,6 @@ void tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); +void tcp_sock_set_user_timeout(struct sock *sk, u32 val); #endif /* _LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d2c67ae1da07..0004bd9ae7b0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2893,6 +2893,14 @@ int tcp_sock_set_syncnt(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_sock_set_syncnt); +void tcp_sock_set_user_timeout(struct sock *sk, u32 val) +{ + lock_sock(sk); + inet_csk(sk)->icsk_user_timeout = val; + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_user_timeout); + /* * Socket option code for TCP. */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 399848c2bcb2..231fd6162f68 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2115,8 +2115,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, (char *)&keepcnt, sizeof(keepcnt)); /* TCP user timeout (see RFC5482) */ - kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (char *)&timeo, sizeof(timeo)); + tcp_sock_set_user_timeout(sock->sk, timeo); } static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, -- cgit v1.3-6-gb490 From 71c48eb81c9ecb6fed49dc33e7c9b621fdcb7bf8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:23 +0200 Subject: tcp: add tcp_sock_set_keepidle Add a helper to directly set the TCP_KEEP_IDLE sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + net/ipv4/tcp.c | 49 ++++++++++++++++++++++++++++++++++--------------- net/rds/tcp_listen.c | 5 +---- net/sunrpc/xprtsock.c | 3 +-- 4 files changed, 37 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index de682143efe4..5724dd84a85e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); void tcp_sock_set_cork(struct sock *sk, bool on); +int tcp_sock_set_keepidle(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0004bd9ae7b0..bdf0ff933351 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2901,6 +2901,39 @@ void tcp_sock_set_user_timeout(struct sock *sk, u32 val) } EXPORT_SYMBOL(tcp_sock_set_user_timeout); +static int __tcp_sock_set_keepidle(struct sock *sk, int val) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (val < 1 || val > MAX_TCP_KEEPIDLE) + return -EINVAL; + + tp->keepalive_time = val * HZ; + if (sock_flag(sk, SOCK_KEEPOPEN) && + !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + u32 elapsed = keepalive_time_elapsed(tp); + + if (tp->keepalive_time > elapsed) + elapsed = tp->keepalive_time - elapsed; + else + elapsed = 0; + inet_csk_reset_keepalive_timer(sk, elapsed); + } + + return 0; +} + +int tcp_sock_set_keepidle(struct sock *sk, int val) +{ + int err; + + lock_sock(sk); + err = __tcp_sock_set_keepidle(sk, val); + release_sock(sk); + return err; +} +EXPORT_SYMBOL(tcp_sock_set_keepidle); + /* * Socket option code for TCP. */ @@ -3070,21 +3103,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_KEEPIDLE: - if (val < 1 || val > MAX_TCP_KEEPIDLE) - err = -EINVAL; - else { - tp->keepalive_time = val * HZ; - if (sock_flag(sk, SOCK_KEEPOPEN) && - !((1 << sk->sk_state) & - (TCPF_CLOSE | TCPF_LISTEN))) { - u32 elapsed = keepalive_time_elapsed(tp); - if (tp->keepalive_time > elapsed) - elapsed = tp->keepalive_time - elapsed; - else - elapsed = 0; - inet_csk_reset_keepalive_timer(sk, elapsed); - } - } + err = __tcp_sock_set_keepidle(sk, val); break; case TCP_KEEPINTVL: if (val < 1 || val > MAX_TCP_KEEPINTVL) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 6f90ea077adc..79f9adc00811 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -52,10 +52,7 @@ int rds_tcp_keepalive(struct socket *sock) if (ret < 0) goto bail; - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); - if (ret < 0) - goto bail; + tcp_sock_set_keepidle(sock->sk, keepidle); /* KEEPINTVL is the interval between successive probes. We follow * the model in xs_tcp_finish_connecting() and re-use keepidle. diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 231fd6162f68..473290f7c5c0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2107,8 +2107,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, /* TCP Keepalive options */ sock_set_keepalive(sock->sk); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); + tcp_sock_set_keepidle(sock->sk, keepidle); kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, (char *)&keepidle, sizeof(keepidle)); kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, -- cgit v1.3-6-gb490 From d41ecaac903c9f4658a71d4e7a708673cfb5abba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:24 +0200 Subject: tcp: add tcp_sock_set_keepintvl Add a helper to directly set the TCP_KEEPINTVL sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + net/ipv4/tcp.c | 12 ++++++++++++ net/rds/tcp_listen.c | 4 +--- net/sunrpc/xprtsock.c | 3 +-- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5724dd84a85e..1f9bada00faa 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -499,6 +499,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, void tcp_sock_set_cork(struct sock *sk, bool on); int tcp_sock_set_keepidle(struct sock *sk, int val); +int tcp_sock_set_keepintvl(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bdf0ff933351..7eb083e09786 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2934,6 +2934,18 @@ int tcp_sock_set_keepidle(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_sock_set_keepidle); +int tcp_sock_set_keepintvl(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPINTVL) + return -EINVAL; + + lock_sock(sk); + tcp_sk(sk)->keepalive_intvl = val * HZ; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_keepintvl); + /* * Socket option code for TCP. */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 79f9adc00811..9ad555c48d15 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -53,12 +53,10 @@ int rds_tcp_keepalive(struct socket *sock) goto bail; tcp_sock_set_keepidle(sock->sk, keepidle); - /* KEEPINTVL is the interval between successive probes. We follow * the model in xs_tcp_finish_connecting() and re-use keepidle. */ - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); + tcp_sock_set_keepintvl(sock->sk, keepidle); bail: return ret; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 473290f7c5c0..5ca64e12af0c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2108,8 +2108,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, /* TCP Keepalive options */ sock_set_keepalive(sock->sk); tcp_sock_set_keepidle(sock->sk, keepidle); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); + tcp_sock_set_keepintvl(sock->sk, keepidle); kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keepcnt, sizeof(keepcnt)); -- cgit v1.3-6-gb490 From 480aeb9639d6a077c611b303a22f9b1e5937d081 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:25 +0200 Subject: tcp: add tcp_sock_set_keepcnt Add a helper to directly set the TCP_KEEPCNT sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + net/ipv4/tcp.c | 12 ++++++++++++ net/rds/tcp.h | 2 +- net/rds/tcp_listen.c | 17 +++-------------- net/sunrpc/xprtsock.c | 3 +-- 5 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1f9bada00faa..9aac824c523c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); void tcp_sock_set_cork(struct sock *sk, bool on); +int tcp_sock_set_keepcnt(struct sock *sk, int val); int tcp_sock_set_keepidle(struct sock *sk, int val); int tcp_sock_set_keepintvl(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7eb083e09786..15d47d5e7951 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2946,6 +2946,18 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_sock_set_keepintvl); +int tcp_sock_set_keepcnt(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPCNT) + return -EINVAL; + + lock_sock(sk); + tcp_sk(sk)->keepalive_probes = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_keepcnt); + /* * Socket option code for TCP. */ diff --git a/net/rds/tcp.h b/net/rds/tcp.h index f6d75d8cb167..bad9cf49d565 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -70,7 +70,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6); void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); -int rds_tcp_keepalive(struct socket *sock); +void rds_tcp_keepalive(struct socket *sock); void *rds_tcp_listen_sock_def_readable(struct net *net); /* tcp_recv.c */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 9ad555c48d15..101cf14215a0 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -38,27 +38,19 @@ #include "rds.h" #include "tcp.h" -int rds_tcp_keepalive(struct socket *sock) +void rds_tcp_keepalive(struct socket *sock) { /* values below based on xs_udp_default_timeout */ int keepidle = 5; /* send a probe 'keepidle' secs after last data */ int keepcnt = 5; /* number of unack'ed probes before declaring dead */ - int ret = 0; sock_set_keepalive(sock->sk); - - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); - if (ret < 0) - goto bail; - + tcp_sock_set_keepcnt(sock->sk, keepcnt); tcp_sock_set_keepidle(sock->sk, keepidle); /* KEEPINTVL is the interval between successive probes. We follow * the model in xs_tcp_finish_connecting() and re-use keepidle. */ tcp_sock_set_keepintvl(sock->sk, keepidle); -bail: - return ret; } /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the @@ -140,10 +132,7 @@ int rds_tcp_accept_one(struct socket *sock) new_sock->ops = sock->ops; __module_get(new_sock->ops->owner); - ret = rds_tcp_keepalive(new_sock); - if (ret < 0) - goto out; - + rds_tcp_keepalive(new_sock); rds_tcp_tune(new_sock); inet = inet_sk(new_sock->sk); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5ca64e12af0c..0d3ec055bc12 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2109,8 +2109,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, sock_set_keepalive(sock->sk); tcp_sock_set_keepidle(sock->sk, keepidle); tcp_sock_set_keepintvl(sock->sk, keepidle); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); + tcp_sock_set_keepcnt(sock->sk, keepcnt); /* TCP user timeout (see RFC5482) */ tcp_sock_set_user_timeout(sock->sk, timeo); -- cgit v1.3-6-gb490