diff options
Diffstat (limited to 'drivers/staging/lustre/lnet')
66 files changed, 0 insertions, 49828 deletions
diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig deleted file mode 100644 index ad049e6f24e4..000000000000 --- a/drivers/staging/lustre/lnet/Kconfig +++ /dev/null @@ -1,46 +0,0 @@ -config LNET - tristate "Lustre networking subsystem (LNet)" - depends on INET - help - The Lustre network layer, also known as LNet, is a networking abstaction - level API that was initially created to allow Lustre Filesystem to utilize - very different networks like tcp and ib verbs in a uniform way. In the - case of Lustre routers only the LNet layer is required. Lately other - projects are also looking into using LNet as their networking API as well. - -config LNET_MAX_PAYLOAD - int "Lustre lnet max transfer payload (default 1MB)" - depends on LNET - default "1048576" - help - This option defines the maximum size of payload in bytes that lnet - can put into its transport. - - If unsure, use default. - -config LNET_SELFTEST - tristate "Lustre networking self testing" - depends on LNET - help - Choose Y here if you want to do lnet self testing. To compile this - as a module, choose M here: the module will be called lnet_selftest. - - To compile this as a kernel modules, choose M here and it will be - called lnet_selftest. - - If unsure, say N. - - See also http://wiki.lustre.org/ - -config LNET_XPRT_IB - tristate "LNET infiniband support" - depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS - default LNET && INFINIBAND - help - This option allows the LNET users to use infiniband as an - RDMA-enabled transport. - - To compile this as a kernel module, choose M here and it will be - called ko2iblnd. - - If unsure, say N. diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile deleted file mode 100644 index 0a380fe88ce8..000000000000 --- a/drivers/staging/lustre/lnet/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/ diff --git a/drivers/staging/lustre/lnet/klnds/Makefile b/drivers/staging/lustre/lnet/klnds/Makefile deleted file mode 100644 index c23e4f67f837..000000000000 --- a/drivers/staging/lustre/lnet/klnds/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_LNET) += o2iblnd/ socklnd/ diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile deleted file mode 100644 index 4affe1d79948..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o -ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c deleted file mode 100644 index 7ae2955c4db6..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ /dev/null @@ -1,2952 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd.c - * - * Author: Eric Barton <eric@bartonsoftware.com> - */ - -#include <asm/div64.h> -#include <asm/page.h> -#include "o2iblnd.h" - -static struct lnet_lnd the_o2iblnd; - -struct kib_data kiblnd_data; - -static __u32 kiblnd_cksum(void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return !sum ? 1 : sum; -} - -static char *kiblnd_msgtype2str(int type) -{ - switch (type) { - case IBLND_MSG_CONNREQ: - return "CONNREQ"; - - case IBLND_MSG_CONNACK: - return "CONNACK"; - - case IBLND_MSG_NOOP: - return "NOOP"; - - case IBLND_MSG_IMMEDIATE: - return "IMMEDIATE"; - - case IBLND_MSG_PUT_REQ: - return "PUT_REQ"; - - case IBLND_MSG_PUT_NAK: - return "PUT_NAK"; - - case IBLND_MSG_PUT_ACK: - return "PUT_ACK"; - - case IBLND_MSG_PUT_DONE: - return "PUT_DONE"; - - case IBLND_MSG_GET_REQ: - return "GET_REQ"; - - case IBLND_MSG_GET_DONE: - return "GET_DONE"; - - default: - return "???"; - } -} - -static int kiblnd_msgtype2size(int type) -{ - const int hdr_size = offsetof(struct kib_msg, ibm_u); - - switch (type) { - case IBLND_MSG_CONNREQ: - case IBLND_MSG_CONNACK: - return hdr_size + sizeof(struct kib_connparams); - - case IBLND_MSG_NOOP: - return hdr_size; - - case IBLND_MSG_IMMEDIATE: - return offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[0]); - - case IBLND_MSG_PUT_REQ: - return hdr_size + sizeof(struct kib_putreq_msg); - - case IBLND_MSG_PUT_ACK: - return hdr_size + sizeof(struct kib_putack_msg); - - case IBLND_MSG_GET_REQ: - return hdr_size + sizeof(struct kib_get_msg); - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - return hdr_size + sizeof(struct kib_completion_msg); - default: - return -1; - } -} - -static int kiblnd_unpack_rd(struct kib_msg *msg, int flip) -{ - struct kib_rdma_desc *rd; - int msg_size; - int nob; - int n; - int i; - - LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ || - msg->ibm_type == IBLND_MSG_PUT_ACK); - - rd = msg->ibm_type == IBLND_MSG_GET_REQ ? - &msg->ibm_u.get.ibgm_rd : - &msg->ibm_u.putack.ibpam_rd; - - if (flip) { - __swab32s(&rd->rd_key); - __swab32s(&rd->rd_nfrags); - } - - n = rd->rd_nfrags; - - nob = offsetof(struct kib_msg, ibm_u) + - kiblnd_rd_msg_size(rd, msg->ibm_type, n); - - if (msg->ibm_nob < nob) { - CERROR("Short %s: %d(%d)\n", - kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob); - return 1; - } - - msg_size = kiblnd_rd_size(rd); - if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) { - CERROR("Bad msg_size: %d, should be 0 < n <= %d\n", - msg_size, LNET_MAX_PAYLOAD); - return 1; - } - - if (!flip) - return 0; - - for (i = 0; i < n; i++) { - __swab32s(&rd->rd_frags[i].rf_nob); - __swab64s(&rd->rd_frags[i].rf_addr); - } - - return 0; -} - -void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version, - int credits, lnet_nid_t dstnid, __u64 dststamp) -{ - struct kib_net *net = ni->ni_data; - - /* - * CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. - */ - msg->ibm_magic = IBLND_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = ni->ni_nid; - msg->ibm_srcstamp = net->ibn_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - - if (*kiblnd_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob); - } -} - -int kiblnd_unpack_msg(struct kib_msg *msg, int nob) -{ - const int hdr_size = offsetof(struct kib_msg, ibm_u); - __u32 msg_cksum; - __u16 version; - int msg_nob; - int flip; - - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - if (msg->ibm_magic == IBLND_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if (version != IBLND_MSG_VERSION && - version != IBLND_MSG_VERSION_1) { - CERROR("Bad version: %x\n", version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* - * checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped - */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum && - msg_cksum != kiblnd_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = version; - BUILD_BUG_ON(sizeof(msg->ibm_type) != 1); - BUILD_BUG_ON(sizeof(msg->ibm_credits) != 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) { - CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type), - msg_nob, kiblnd_msgtype2size(msg->ibm_type)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBLND_MSG_NOOP: - case IBLND_MSG_IMMEDIATE: - case IBLND_MSG_PUT_REQ: - break; - - case IBLND_MSG_PUT_ACK: - case IBLND_MSG_GET_REQ: - if (kiblnd_unpack_rd(msg, flip)) - return -EPROTO; - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBLND_MSG_CONNREQ: - case IBLND_MSG_CONNACK: - if (flip) { - __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab16s(&msg->ibm_u.connparams.ibcp_max_frags); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - } - break; - } - return 0; -} - -int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp, - lnet_nid_t nid) -{ - struct kib_peer *peer; - struct kib_net *net = ni->ni_data; - int cpt = lnet_cpt_of_nid(nid); - unsigned long flags; - - LASSERT(net); - LASSERT(nid != LNET_NID_ANY); - - peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt); - if (!peer) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - peer->ibp_ni = ni; - peer->ibp_nid = nid; - peer->ibp_error = 0; - peer->ibp_last_alive = 0; - peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni); - peer->ibp_queue_depth = ni->ni_peertxcredits; - atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD(&peer->ibp_conns); - INIT_LIST_HEAD(&peer->ibp_tx_queue); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT(!net->ibn_shutdown); - - /* npeers only grows with the global lock held */ - atomic_inc(&net->ibn_npeers); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - *peerp = peer; - return 0; -} - -void kiblnd_destroy_peer(struct kib_peer *peer) -{ - struct kib_net *net = peer->ibp_ni->ni_data; - - LASSERT(net); - LASSERT(!atomic_read(&peer->ibp_refcount)); - LASSERT(!kiblnd_peer_active(peer)); - LASSERT(kiblnd_peer_idle(peer)); - LASSERT(list_empty(&peer->ibp_tx_queue)); - - kfree(peer); - - /* - * NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. - */ - atomic_dec(&net->ibn_npeers); -} - -struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid) -{ - /* - * the caller is responsible for accounting the additional reference - * that this creates - */ - struct list_head *peer_list = kiblnd_nid2peerlist(nid); - struct list_head *tmp; - struct kib_peer *peer; - - list_for_each(tmp, peer_list) { - peer = list_entry(tmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n", - peer, libcfs_nid2str(nid), - atomic_read(&peer->ibp_refcount), - peer->ibp_version); - return peer; - } - return NULL; -} - -void kiblnd_unlink_peer_locked(struct kib_peer *peer) -{ - LASSERT(list_empty(&peer->ibp_conns)); - - LASSERT(kiblnd_peer_active(peer)); - list_del_init(&peer->ibp_list); - /* lose peerlist's ref */ - kiblnd_peer_decref(peer); -} - -static int kiblnd_get_peer_info(struct lnet_ni *ni, int index, - lnet_nid_t *nidp, int *count) -{ - struct kib_peer *peer; - struct list_head *ptmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *count = atomic_read(&peer->ibp_refcount); - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return 0; - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return -ENOENT; -} - -static void kiblnd_del_peer_locked(struct kib_peer *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - struct kib_conn *conn; - - if (list_empty(&peer->ibp_conns)) { - kiblnd_unlink_peer_locked(peer); - } else { - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - kiblnd_close_conn_locked(conn, 0); - } - /* NB closing peer's last conn unlinked it. */ - } - /* - * NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. - */ -} - -static int kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid) -{ - LIST_HEAD(zombies); - struct list_head *ptmp; - struct list_head *pnxt; - struct kib_peer *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) { - lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - } else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT(list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, - &zombies); - } - - kiblnd_del_peer_locked(peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_txlist_done(ni, &zombies, -EIO); - - return rc; -} - -static struct kib_conn *kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index) -{ - struct kib_peer *peer; - struct list_head *ptmp; - struct kib_conn *conn; - struct list_head *ctmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - list_for_each(ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, struct kib_conn, - ibc_list); - kiblnd_conn_addref(conn); - read_unlock_irqrestore( - &kiblnd_data.kib_global_lock, - flags); - return conn; - } - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return NULL; -} - -int kiblnd_translate_mtu(int value) -{ - switch (value) { - default: - return -1; - case 0: - return 0; - case 256: - return IB_MTU_256; - case 512: - return IB_MTU_512; - case 1024: - return IB_MTU_1024; - case 2048: - return IB_MTU_2048; - case 4096: - return IB_MTU_4096; - } -} - -static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid) -{ - int mtu; - - /* XXX There is no path record for iWARP, set by netdev->change_mtu? */ - if (!cmid->route.path_rec) - return; - - mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu); - LASSERT(mtu >= 0); - if (mtu) - cmid->route.path_rec->mtu = mtu; -} - -static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt) -{ - cpumask_var_t *mask; - int vectors; - int off; - int i; - lnet_nid_t nid = conn->ibc_peer->ibp_nid; - - vectors = conn->ibc_cmid->device->num_comp_vectors; - if (vectors <= 1) - return 0; - - mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt); - if (!mask) - return 0; - - /* hash NID to CPU id in this partition... */ - off = do_div(nid, cpumask_weight(*mask)); - for_each_cpu(i, *mask) { - if (!off--) - return i % vectors; - } - - LBUG(); - return 1; -} - -struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, - int state, int version) -{ - /* - * CAVEAT EMPTOR: - * If the new conn is created successfully it takes over the caller's - * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself - * is destroyed. On failure, the caller's ref on 'peer' remains and - * she must dispose of 'cmid'. (Actually I'd block forever if I tried - * to destroy 'cmid' here since I'm called from the CM which still has - * its ref on 'cmid'). - */ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_net *net = peer->ibp_ni->ni_data; - struct kib_dev *dev; - struct ib_qp_init_attr *init_qp_attr; - struct kib_sched_info *sched; - struct ib_cq_init_attr cq_attr = {}; - struct kib_conn *conn; - struct ib_cq *cq; - unsigned long flags; - int cpt; - int rc; - int i; - - LASSERT(net); - LASSERT(!in_interrupt()); - - dev = net->ibn_dev; - - cpt = lnet_cpt_of_nid(peer->ibp_nid); - sched = kiblnd_data.kib_scheds[cpt]; - - LASSERT(sched->ibs_nthreads > 0); - - init_qp_attr = kzalloc_cpt(sizeof(*init_qp_attr), GFP_NOFS, cpt); - if (!init_qp_attr) { - CERROR("Can't allocate qp_attr for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_0; - } - - conn = kzalloc_cpt(sizeof(*conn), GFP_NOFS, cpt); - if (!conn) { - CERROR("Can't allocate connection for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_1; - } - - conn->ibc_state = IBLND_CONN_INIT; - conn->ibc_version = version; - conn->ibc_peer = peer; /* I take the caller's ref */ - cmid->context = conn; /* for future CM callbacks */ - conn->ibc_cmid = cmid; - conn->ibc_max_frags = peer->ibp_max_frags; - conn->ibc_queue_depth = peer->ibp_queue_depth; - - INIT_LIST_HEAD(&conn->ibc_early_rxs); - INIT_LIST_HEAD(&conn->ibc_tx_noops); - INIT_LIST_HEAD(&conn->ibc_tx_queue); - INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD(&conn->ibc_active_txs); - spin_lock_init(&conn->ibc_lock); - - conn->ibc_connvars = kzalloc_cpt(sizeof(*conn->ibc_connvars), GFP_NOFS, cpt); - if (!conn->ibc_connvars) { - CERROR("Can't allocate in-progress connection state\n"); - goto failed_2; - } - - write_lock_irqsave(glock, flags); - if (dev->ibd_failover) { - write_unlock_irqrestore(glock, flags); - CERROR("%s: failover in progress\n", dev->ibd_ifname); - goto failed_2; - } - - if (dev->ibd_hdev->ibh_ibdev != cmid->device) { - /* wakeup failover thread and teardown connection */ - if (kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - wake_up(&kiblnd_data.kib_failover_waitq); - } - - write_unlock_irqrestore(glock, flags); - CERROR("cmid HCA(%s), kib_dev(%s) need failover\n", - cmid->device->name, dev->ibd_ifname); - goto failed_2; - } - - kiblnd_hdev_addref_locked(dev->ibd_hdev); - conn->ibc_hdev = dev->ibd_hdev; - - kiblnd_setup_mtu_locked(cmid); - - write_unlock_irqrestore(glock, flags); - - conn->ibc_rxs = kzalloc_cpt(IBLND_RX_MSGS(conn) * sizeof(struct kib_rx), - GFP_NOFS, cpt); - if (!conn->ibc_rxs) { - CERROR("Cannot allocate RX buffers\n"); - goto failed_2; - } - - rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt, - IBLND_RX_MSG_PAGES(conn)); - if (rc) - goto failed_2; - - kiblnd_map_rx_descs(conn); - - cq_attr.cqe = IBLND_CQ_ENTRIES(conn); - cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt); - cq = ib_create_cq(cmid->device, - kiblnd_cq_completion, kiblnd_cq_event, conn, - &cq_attr); - if (IS_ERR(cq)) { - CERROR("Failed to create CQ with %d CQEs: %ld\n", - IBLND_CQ_ENTRIES(conn), PTR_ERR(cq)); - goto failed_2; - } - - conn->ibc_cq = cq; - - rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - if (rc) { - CERROR("Can't request completion notification: %d\n", rc); - goto failed_2; - } - - init_qp_attr->event_handler = kiblnd_qp_event; - init_qp_attr->qp_context = conn; - init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn); - init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn); - init_qp_attr->cap.max_send_sge = 1; - init_qp_attr->cap.max_recv_sge = 1; - init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - init_qp_attr->qp_type = IB_QPT_RC; - init_qp_attr->send_cq = cq; - init_qp_attr->recv_cq = cq; - - conn->ibc_sched = sched; - - rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr); - if (rc) { - CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n", - rc, init_qp_attr->cap.max_send_wr, - init_qp_attr->cap.max_recv_wr); - goto failed_2; - } - - kfree(init_qp_attr); - - /* 1 ref for caller and each rxmsg */ - atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn)); - conn->ibc_nrx = IBLND_RX_MSGS(conn); - - /* post receives */ - for (i = 0; i < IBLND_RX_MSGS(conn); i++) { - rc = kiblnd_post_rx(&conn->ibc_rxs[i], - IBLND_POSTRX_NO_CREDIT); - if (rc) { - CERROR("Can't post rxmsg: %d\n", rc); - - /* Make posted receives complete */ - kiblnd_abort_receives(conn); - - /* - * correct # of posted buffers - * NB locking needed now I'm racing with completion - */ - spin_lock_irqsave(&sched->ibs_lock, flags); - conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i; - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - /* - * cmid will be destroyed by CM(ofed) after cm_callback - * returned, so we can't refer it anymore - * (by kiblnd_connd()->kiblnd_destroy_conn) - */ - rdma_destroy_qp(conn->ibc_cmid); - conn->ibc_cmid = NULL; - - /* Drop my own and unused rxbuffer refcounts */ - while (i++ <= IBLND_RX_MSGS(conn)) - kiblnd_conn_decref(conn); - - return NULL; - } - } - - /* Init successful! */ - LASSERT(state == IBLND_CONN_ACTIVE_CONNECT || - state == IBLND_CONN_PASSIVE_WAIT); - conn->ibc_state = state; - - /* 1 more conn */ - atomic_inc(&net->ibn_nconns); - return conn; - - failed_2: - kiblnd_destroy_conn(conn); - kfree(conn); - failed_1: - kfree(init_qp_attr); - failed_0: - return NULL; -} - -void kiblnd_destroy_conn(struct kib_conn *conn) -{ - struct rdma_cm_id *cmid = conn->ibc_cmid; - struct kib_peer *peer = conn->ibc_peer; - int rc; - - LASSERT(!in_interrupt()); - LASSERT(!atomic_read(&conn->ibc_refcount)); - LASSERT(list_empty(&conn->ibc_early_rxs)); - LASSERT(list_empty(&conn->ibc_tx_noops)); - LASSERT(list_empty(&conn->ibc_tx_queue)); - LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT(list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT(list_empty(&conn->ibc_active_txs)); - LASSERT(!conn->ibc_noops_posted); - LASSERT(!conn->ibc_nsends_posted); - - switch (conn->ibc_state) { - default: - /* conn must be completely disengaged from the network */ - LBUG(); - - case IBLND_CONN_DISCONNECTED: - /* connvars should have been freed already */ - LASSERT(!conn->ibc_connvars); - break; - - case IBLND_CONN_INIT: - break; - } - - /* conn->ibc_cmid might be destroyed by CM already */ - if (cmid && cmid->qp) - rdma_destroy_qp(cmid); - - if (conn->ibc_cq) { - rc = ib_destroy_cq(conn->ibc_cq); - if (rc) - CWARN("Error destroying CQ: %d\n", rc); - } - - if (conn->ibc_rx_pages) - kiblnd_unmap_rx_descs(conn); - - kfree(conn->ibc_rxs); - kfree(conn->ibc_connvars); - - if (conn->ibc_hdev) - kiblnd_hdev_decref(conn->ibc_hdev); - - /* See CAVEAT EMPTOR above in kiblnd_create_conn */ - if (conn->ibc_state != IBLND_CONN_INIT) { - struct kib_net *net = peer->ibp_ni->ni_data; - - kiblnd_peer_decref(peer); - rdma_destroy_id(cmid); - atomic_dec(&net->ibn_nconns); - } -} - -int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why) -{ - struct kib_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_version, why); - - kiblnd_close_conn_locked(conn, why); - count++; - } - - return count; -} - -int kiblnd_close_stale_conns_locked(struct kib_peer *peer, - int version, __u64 incarnation) -{ - struct kib_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - if (conn->ibc_version == version && - conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, - "Closing stale conn -> %s version: %x, incarnation:%#llx(%x, %#llx)\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_version, conn->ibc_incarnation, - version, incarnation); - - kiblnd_close_conn_locked(conn, -ESTALE); - count++; - } - - return count; -} - -static int kiblnd_close_matching_conns(struct lnet_ni *ni, lnet_nid_t nid) -{ - struct kib_peer *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - unsigned long flags; - int count = 0; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) { - lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - } else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kiblnd_close_peer_conns_locked(peer, 0); - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return 0; - - return !count ? -ENOENT : 0; -} - -static int kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - switch (cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int count = 0; - - rc = kiblnd_get_peer_info(ni, data->ioc_count, - &nid, &count); - data->ioc_nid = nid; - data->ioc_count = count; - break; - } - - case IOC_LIBCFS_DEL_PEER: { - rc = kiblnd_del_peer(ni, data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - struct kib_conn *conn; - - rc = 0; - conn = kiblnd_get_conn_by_idx(ni, data->ioc_count); - if (!conn) { - rc = -ENOENT; - break; - } - - LASSERT(conn->ibc_cmid); - data->ioc_nid = conn->ibc_peer->ibp_nid; - if (!conn->ibc_cmid->route.path_rec) - data->ioc_u32[0] = 0; /* iWarp has no path MTU */ - else - data->ioc_u32[0] = - ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu); - kiblnd_conn_decref(conn); - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kiblnd_close_matching_conns(ni, data->ioc_nid); - break; - } - - default: - break; - } - - return rc; -} - -static void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid, - unsigned long *when) -{ - unsigned long last_alive = 0; - unsigned long now = cfs_time_current(); - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_peer *peer; - unsigned long flags; - - read_lock_irqsave(glock, flags); - - peer = kiblnd_find_peer_locked(nid); - if (peer) - last_alive = peer->ibp_last_alive; - - read_unlock_irqrestore(glock, flags); - - if (last_alive) - *when = last_alive; - - /* - * peer is not persistent in hash, trigger peer creation - * and connection establishment with a NULL tx - */ - if (!peer) - kiblnd_launch_tx(ni, NULL, nid); - - CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n", - libcfs_nid2str(nid), peer, - last_alive ? cfs_duration_sec(now - last_alive) : -1); -} - -static void kiblnd_free_pages(struct kib_pages *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) { - if (p->ibp_pages[i]) - __free_page(p->ibp_pages[i]); - } - - kfree(p); -} - -int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages) -{ - struct kib_pages *p; - int i; - - p = kzalloc_cpt(offsetof(struct kib_pages, ibp_pages[npages]), - GFP_NOFS, cpt); - if (!p) { - CERROR("Can't allocate descriptor for %d pages\n", npages); - return -ENOMEM; - } - - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_pages_node( - cfs_cpt_spread_node(lnet_cpt_table(), cpt), - GFP_NOFS, 0); - if (!p->ibp_pages[i]) { - CERROR("Can't allocate page %d of %d\n", i, npages); - kiblnd_free_pages(p); - return -ENOMEM; - } - } - - *pp = p; - return 0; -} - -void kiblnd_unmap_rx_descs(struct kib_conn *conn) -{ - struct kib_rx *rx; - int i; - - LASSERT(conn->ibc_rxs); - LASSERT(conn->ibc_hdev); - - for (i = 0; i < IBLND_RX_MSGS(conn); i++) { - rx = &conn->ibc_rxs[i]; - - LASSERT(rx->rx_nob >= 0); /* not posted */ - - kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev, - KIBLND_UNMAP_ADDR(rx, rx_msgunmap, - rx->rx_msgaddr), - IBLND_MSG_SIZE, DMA_FROM_DEVICE); - } - - kiblnd_free_pages(conn->ibc_rx_pages); - - conn->ibc_rx_pages = NULL; -} - -void kiblnd_map_rx_descs(struct kib_conn *conn) -{ - struct kib_rx *rx; - struct page *pg; - int pg_off; - int ipg; - int i; - - for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) { - pg = conn->ibc_rx_pages->ibp_pages[ipg]; - rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_msg = (struct kib_msg *)(((char *)page_address(pg)) + pg_off); - - rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev, - rx->rx_msg, - IBLND_MSG_SIZE, - DMA_FROM_DEVICE); - LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev, - rx->rx_msgaddr)); - KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr); - - CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n", - i, rx->rx_msg, rx->rx_msgaddr, - (__u64)(page_to_phys(pg) + pg_off)); - - pg_off += IBLND_MSG_SIZE; - LASSERT(pg_off <= PAGE_SIZE); - - if (pg_off == PAGE_SIZE) { - pg_off = 0; - ipg++; - LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn)); - } - } -} - -static void kiblnd_unmap_tx_pool(struct kib_tx_pool *tpo) -{ - struct kib_hca_dev *hdev = tpo->tpo_hdev; - struct kib_tx *tx; - int i; - - LASSERT(!tpo->tpo_pool.po_allocated); - - if (!hdev) - return; - - for (i = 0; i < tpo->tpo_pool.po_size; i++) { - tx = &tpo->tpo_tx_descs[i]; - kiblnd_dma_unmap_single(hdev->ibh_ibdev, - KIBLND_UNMAP_ADDR(tx, tx_msgunmap, - tx->tx_msgaddr), - IBLND_MSG_SIZE, DMA_TO_DEVICE); - } - - kiblnd_hdev_decref(hdev); - tpo->tpo_hdev = NULL; -} - -static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev) -{ - struct kib_hca_dev *hdev; - unsigned long flags; - int i = 0; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - while (dev->ibd_failover) { - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (!(i++ % 50)) - CDEBUG(D_NET, "%s: Wait for failover\n", - dev->ibd_ifname); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 100); - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - } - - kiblnd_hdev_addref_locked(dev->ibd_hdev); - hdev = dev->ibd_hdev; - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - return hdev; -} - -static void kiblnd_map_tx_pool(struct kib_tx_pool *tpo) -{ - struct kib_pages *txpgs = tpo->tpo_tx_pages; - struct kib_pool *pool = &tpo->tpo_pool; - struct kib_net *net = pool->po_owner->ps_net; - struct kib_dev *dev; - struct page *page; - struct kib_tx *tx; - int page_offset; - int ipage; - int i; - - LASSERT(net); - - dev = net->ibn_dev; - - /* pre-mapped messages are not bigger than 1 page */ - BUILD_BUG_ON(IBLND_MSG_SIZE > PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - BUILD_BUG_ON(PAGE_SIZE % IBLND_MSG_SIZE); - - tpo->tpo_hdev = kiblnd_current_hdev(dev); - - for (ipage = page_offset = i = 0; i < pool->po_size; i++) { - page = txpgs->ibp_pages[ipage]; - tx = &tpo->tpo_tx_descs[i]; - - tx->tx_msg = (struct kib_msg *)(((char *)page_address(page)) + - page_offset); - - tx->tx_msgaddr = kiblnd_dma_map_single( - tpo->tpo_hdev->ibh_ibdev, tx->tx_msg, - IBLND_MSG_SIZE, DMA_TO_DEVICE); - LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev, - tx->tx_msgaddr)); - KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr); - - list_add(&tx->tx_list, &pool->po_free_list); - - page_offset += IBLND_MSG_SIZE; - LASSERT(page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT(ipage <= txpgs->ibp_npages); - } - } -} - -static void kiblnd_destroy_fmr_pool(struct kib_fmr_pool *fpo) -{ - LASSERT(!fpo->fpo_map_count); - - if (fpo->fpo_is_fmr) { - if (fpo->fmr.fpo_fmr_pool) - ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool); - } else { - struct kib_fast_reg_descriptor *frd, *tmp; - int i = 0; - - list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, - frd_list) { - list_del(&frd->frd_list); - ib_dereg_mr(frd->frd_mr); - kfree(frd); - i++; - } - if (i < fpo->fast_reg.fpo_pool_size) - CERROR("FastReg pool still has %d regions registered\n", - fpo->fast_reg.fpo_pool_size - i); - } - - if (fpo->fpo_hdev) - kiblnd_hdev_decref(fpo->fpo_hdev); - - kfree(fpo); -} - -static void kiblnd_destroy_fmr_pool_list(struct list_head *head) -{ - struct kib_fmr_pool *fpo, *tmp; - - list_for_each_entry_safe(fpo, tmp, head, fpo_list) { - list_del(&fpo->fpo_list); - kiblnd_destroy_fmr_pool(fpo); - } -} - -static int -kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables, - int ncpts) -{ - int size = tunables->lnd_fmr_pool_size / ncpts; - - return max(IBLND_FMR_POOL, size); -} - -static int -kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables, - int ncpts) -{ - int size = tunables->lnd_fmr_flush_trigger / ncpts; - - return max(IBLND_FMR_POOL_FLUSH, size); -} - -static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo) -{ - struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE, - .page_shift = PAGE_SHIFT, - .access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE), - .pool_size = fps->fps_pool_size, - .dirty_watermark = fps->fps_flush_trigger, - .flush_function = NULL, - .flush_arg = NULL, - .cache = !!fps->fps_cache }; - int rc = 0; - - fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, - ¶m); - if (IS_ERR(fpo->fmr.fpo_fmr_pool)) { - rc = PTR_ERR(fpo->fmr.fpo_fmr_pool); - if (rc != -ENOSYS) - CERROR("Failed to create FMR pool: %d\n", rc); - else - CERROR("FMRs are not supported\n"); - } - - return rc; -} - -static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo) -{ - struct kib_fast_reg_descriptor *frd, *tmp; - int i, rc; - - INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list); - fpo->fast_reg.fpo_pool_size = 0; - for (i = 0; i < fps->fps_pool_size; i++) { - frd = kzalloc_cpt(sizeof(*frd), GFP_NOFS, fps->fps_cpt); - if (!frd) { - CERROR("Failed to allocate a new fast_reg descriptor\n"); - rc = -ENOMEM; - goto out; - } - - frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd, - IB_MR_TYPE_MEM_REG, - LNET_MAX_PAYLOAD / PAGE_SIZE); - if (IS_ERR(frd->frd_mr)) { - rc = PTR_ERR(frd->frd_mr); - CERROR("Failed to allocate ib_alloc_mr: %d\n", rc); - frd->frd_mr = NULL; - goto out_middle; - } - - frd->frd_valid = true; - - list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); - fpo->fast_reg.fpo_pool_size++; - } - - return 0; - -out_middle: - if (frd->frd_mr) - ib_dereg_mr(frd->frd_mr); - kfree(frd); - -out: - list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, - frd_list) { - list_del(&frd->frd_list); - ib_dereg_mr(frd->frd_mr); - kfree(frd); - } - - return rc; -} - -static int kiblnd_create_fmr_pool(struct kib_fmr_poolset *fps, - struct kib_fmr_pool **pp_fpo) -{ - struct kib_dev *dev = fps->fps_net->ibn_dev; - struct ib_device_attr *dev_attr; - struct kib_fmr_pool *fpo; - int rc; - - fpo = kzalloc_cpt(sizeof(*fpo), GFP_NOFS, fps->fps_cpt); - if (!fpo) - return -ENOMEM; - - fpo->fpo_hdev = kiblnd_current_hdev(dev); - dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs; - - /* Check for FMR or FastReg support */ - fpo->fpo_is_fmr = 0; - if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr && - fpo->fpo_hdev->ibh_ibdev->dealloc_fmr && - fpo->fpo_hdev->ibh_ibdev->map_phys_fmr && - fpo->fpo_hdev->ibh_ibdev->unmap_fmr) { - LCONSOLE_INFO("Using FMR for registration\n"); - fpo->fpo_is_fmr = 1; - } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - LCONSOLE_INFO("Using FastReg for registration\n"); - } else { - rc = -ENOSYS; - LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n"); - goto out_fpo; - } - - if (fpo->fpo_is_fmr) - rc = kiblnd_alloc_fmr_pool(fps, fpo); - else - rc = kiblnd_alloc_freg_pool(fps, fpo); - if (rc) - goto out_fpo; - - fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); - fpo->fpo_owner = fps; - *pp_fpo = fpo; - - return 0; - -out_fpo: - kiblnd_hdev_decref(fpo->fpo_hdev); - kfree(fpo); - return rc; -} - -static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps, - struct list_head *zombies) -{ - if (!fps->fps_net) /* initialized? */ - return; - - spin_lock(&fps->fps_lock); - - while (!list_empty(&fps->fps_pool_list)) { - struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next, - struct kib_fmr_pool, fpo_list); - fpo->fpo_failed = 1; - list_del(&fpo->fpo_list); - if (!fpo->fpo_map_count) - list_add(&fpo->fpo_list, zombies); - else - list_add(&fpo->fpo_list, &fps->fps_failed_pool_list); - } - - spin_unlock(&fps->fps_lock); -} - -static void kiblnd_fini_fmr_poolset(struct kib_fmr_poolset *fps) -{ - if (fps->fps_net) { /* initialized? */ - kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list); - kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list); - } -} - -static int -kiblnd_init_fmr_poolset(struct kib_fmr_poolset *fps, int cpt, int ncpts, - struct kib_net *net, - struct lnet_ioctl_config_o2iblnd_tunables *tunables) -{ - struct kib_fmr_pool *fpo; - int rc; - - memset(fps, 0, sizeof(*fps)); - - fps->fps_net = net; - fps->fps_cpt = cpt; - - fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts); - fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts); - fps->fps_cache = tunables->lnd_fmr_cache; - - spin_lock_init(&fps->fps_lock); - INIT_LIST_HEAD(&fps->fps_pool_list); - INIT_LIST_HEAD(&fps->fps_failed_pool_list); - - rc = kiblnd_create_fmr_pool(fps, &fpo); - if (!rc) - list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); - - return rc; -} - -static int kiblnd_fmr_pool_is_idle(struct kib_fmr_pool *fpo, unsigned long now) -{ - if (fpo->fpo_map_count) /* still in use */ - return 0; - if (fpo->fpo_failed) - return 1; - return cfs_time_aftereq(now, fpo->fpo_deadline); -} - -static int -kiblnd_map_tx_pages(struct kib_tx *tx, struct kib_rdma_desc *rd) -{ - __u64 *pages = tx->tx_pages; - struct kib_hca_dev *hdev; - int npages; - int size; - int i; - - hdev = tx->tx_pool->tpo_hdev; - - for (i = 0, npages = 0; i < rd->rd_nfrags; i++) { - for (size = 0; size < rd->rd_frags[i].rf_nob; - size += hdev->ibh_page_size) { - pages[npages++] = (rd->rd_frags[i].rf_addr & - hdev->ibh_page_mask) + size; - } - } - - return npages; -} - -void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status) -{ - LIST_HEAD(zombies); - struct kib_fmr_pool *fpo = fmr->fmr_pool; - struct kib_fmr_poolset *fps; - unsigned long now = cfs_time_current(); - struct kib_fmr_pool *tmp; - int rc; - - if (!fpo) - return; - - fps = fpo->fpo_owner; - if (fpo->fpo_is_fmr) { - if (fmr->fmr_pfmr) { - rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); - LASSERT(!rc); - fmr->fmr_pfmr = NULL; - } - - if (status) { - rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool); - LASSERT(!rc); - } - } else { - struct kib_fast_reg_descriptor *frd = fmr->fmr_frd; - - if (frd) { - frd->frd_valid = false; - spin_lock(&fps->fps_lock); - list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); - spin_unlock(&fps->fps_lock); - fmr->fmr_frd = NULL; - } - } - fmr->fmr_pool = NULL; - - spin_lock(&fps->fps_lock); - fpo->fpo_map_count--; /* decref the pool */ - - list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) { - /* the first pool is persistent */ - if (fps->fps_pool_list.next == &fpo->fpo_list) - continue; - - if (kiblnd_fmr_pool_is_idle(fpo, now)) { - list_move(&fpo->fpo_list, &zombies); - fps->fps_version++; - } - } - spin_unlock(&fps->fps_lock); - - if (!list_empty(&zombies)) - kiblnd_destroy_fmr_pool_list(&zombies); -} - -int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx, - struct kib_rdma_desc *rd, __u32 nob, __u64 iov, - struct kib_fmr *fmr) -{ - __u64 *pages = tx->tx_pages; - bool is_rx = (rd != tx->tx_rd); - bool tx_pages_mapped = false; - struct kib_fmr_pool *fpo; - int npages = 0; - __u64 version; - int rc; - - again: - spin_lock(&fps->fps_lock); - version = fps->fps_version; - list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) { - fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); - fpo->fpo_map_count++; - - if (fpo->fpo_is_fmr) { - struct ib_pool_fmr *pfmr; - - spin_unlock(&fps->fps_lock); - - if (!tx_pages_mapped) { - npages = kiblnd_map_tx_pages(tx, rd); - tx_pages_mapped = 1; - } - - pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool, - pages, npages, iov); - if (likely(!IS_ERR(pfmr))) { - fmr->fmr_key = is_rx ? pfmr->fmr->rkey : - pfmr->fmr->lkey; - fmr->fmr_frd = NULL; - fmr->fmr_pfmr = pfmr; - fmr->fmr_pool = fpo; - return 0; - } - rc = PTR_ERR(pfmr); - } else { - if (!list_empty(&fpo->fast_reg.fpo_pool_list)) { - struct kib_fast_reg_descriptor *frd; - struct ib_reg_wr *wr; - struct ib_mr *mr; - int n; - - frd = list_first_entry(&fpo->fast_reg.fpo_pool_list, - struct kib_fast_reg_descriptor, - frd_list); - list_del(&frd->frd_list); - spin_unlock(&fps->fps_lock); - - mr = frd->frd_mr; - - if (!frd->frd_valid) { - __u32 key = is_rx ? mr->rkey : mr->lkey; - struct ib_send_wr *inv_wr; - - inv_wr = &frd->frd_inv_wr; - memset(inv_wr, 0, sizeof(*inv_wr)); - inv_wr->opcode = IB_WR_LOCAL_INV; - inv_wr->wr_id = IBLND_WID_MR; - inv_wr->ex.invalidate_rkey = key; - - /* Bump the key */ - key = ib_inc_rkey(key); - ib_update_fast_reg_key(mr, key); - } - - n = ib_map_mr_sg(mr, tx->tx_frags, - tx->tx_nfrags, NULL, PAGE_SIZE); - if (unlikely(n != tx->tx_nfrags)) { - CERROR("Failed to map mr %d/%d elements\n", - n, tx->tx_nfrags); - return n < 0 ? n : -EINVAL; - } - - mr->iova = iov; - - /* Prepare FastReg WR */ - wr = &frd->frd_fastreg_wr; - memset(wr, 0, sizeof(*wr)); - wr->wr.opcode = IB_WR_REG_MR; - wr->wr.wr_id = IBLND_WID_MR; - wr->wr.num_sge = 0; - wr->wr.send_flags = 0; - wr->mr = mr; - wr->key = is_rx ? mr->rkey : mr->lkey; - wr->access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); - - fmr->fmr_key = is_rx ? mr->rkey : mr->lkey; - fmr->fmr_frd = frd; - fmr->fmr_pfmr = NULL; - fmr->fmr_pool = fpo; - return 0; - } - spin_unlock(&fps->fps_lock); - rc = -EBUSY; - } - - spin_lock(&fps->fps_lock); - fpo->fpo_map_count--; - if (rc != -EAGAIN) { - spin_unlock(&fps->fps_lock); - return rc; - } - - /* EAGAIN and ... */ - if (version != fps->fps_version) { - spin_unlock(&fps->fps_lock); - goto again; - } - } - - if (fps->fps_increasing) { - spin_unlock(&fps->fps_lock); - CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n"); - schedule(); - goto again; - } - - if (time_before(cfs_time_current(), fps->fps_next_retry)) { - /* someone failed recently */ - spin_unlock(&fps->fps_lock); - return -EAGAIN; - } - - fps->fps_increasing = 1; - spin_unlock(&fps->fps_lock); - - CDEBUG(D_NET, "Allocate new FMR pool\n"); - rc = kiblnd_create_fmr_pool(fps, &fpo); - spin_lock(&fps->fps_lock); - fps->fps_increasing = 0; - if (!rc) { - fps->fps_version++; - list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); - } else { - fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY); - } - spin_unlock(&fps->fps_lock); - - goto again; -} - -static void kiblnd_fini_pool(struct kib_pool *pool) -{ - LASSERT(list_empty(&pool->po_free_list)); - LASSERT(!pool->po_allocated); - - CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name); -} - -static void kiblnd_init_pool(struct kib_poolset *ps, struct kib_pool *pool, int size) -{ - CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name); - - memset(pool, 0, sizeof(*pool)); - INIT_LIST_HEAD(&pool->po_free_list); - pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); - pool->po_owner = ps; - pool->po_size = size; -} - -static void kiblnd_destroy_pool_list(struct list_head *head) -{ - struct kib_pool *pool; - - while (!list_empty(head)) { - pool = list_entry(head->next, struct kib_pool, po_list); - list_del(&pool->po_list); - - LASSERT(pool->po_owner); - pool->po_owner->ps_pool_destroy(pool); - } -} - -static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies) -{ - if (!ps->ps_net) /* initialized? */ - return; - - spin_lock(&ps->ps_lock); - while (!list_empty(&ps->ps_pool_list)) { - struct kib_pool *po = list_entry(ps->ps_pool_list.next, - struct kib_pool, po_list); - po->po_failed = 1; - list_del(&po->po_list); - if (!po->po_allocated) - list_add(&po->po_list, zombies); - else - list_add(&po->po_list, &ps->ps_failed_pool_list); - } - spin_unlock(&ps->ps_lock); -} - -static void kiblnd_fini_poolset(struct kib_poolset *ps) -{ - if (ps->ps_net) { /* initialized? */ - kiblnd_destroy_pool_list(&ps->ps_failed_pool_list); - kiblnd_destroy_pool_list(&ps->ps_pool_list); - } -} - -static int kiblnd_init_poolset(struct kib_poolset *ps, int cpt, - struct kib_net *net, char *name, int size, - kib_ps_pool_create_t po_create, - kib_ps_pool_destroy_t po_destroy, - kib_ps_node_init_t nd_init, - kib_ps_node_fini_t nd_fini) -{ - struct kib_pool *pool; - int rc; - - memset(ps, 0, sizeof(*ps)); - - ps->ps_cpt = cpt; - ps->ps_net = net; - ps->ps_pool_create = po_create; - ps->ps_pool_destroy = po_destroy; - ps->ps_node_init = nd_init; - ps->ps_node_fini = nd_fini; - ps->ps_pool_size = size; - if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name)) - >= sizeof(ps->ps_name)) - return -E2BIG; - spin_lock_init(&ps->ps_lock); - INIT_LIST_HEAD(&ps->ps_pool_list); - INIT_LIST_HEAD(&ps->ps_failed_pool_list); - - rc = ps->ps_pool_create(ps, size, &pool); - if (!rc) - list_add(&pool->po_list, &ps->ps_pool_list); - else - CERROR("Failed to create the first pool for %s\n", ps->ps_name); - - return rc; -} - -static int kiblnd_pool_is_idle(struct kib_pool *pool, unsigned long now) -{ - if (pool->po_allocated) /* still in use */ - return 0; - if (pool->po_failed) - return 1; - return cfs_time_aftereq(now, pool->po_deadline); -} - -void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node) -{ - LIST_HEAD(zombies); - struct kib_poolset *ps = pool->po_owner; - struct kib_pool *tmp; - unsigned long now = cfs_time_current(); - - spin_lock(&ps->ps_lock); - - if (ps->ps_node_fini) - ps->ps_node_fini(pool, node); - - LASSERT(pool->po_allocated > 0); - list_add(node, &pool->po_free_list); - pool->po_allocated--; - - list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) { - /* the first pool is persistent */ - if (ps->ps_pool_list.next == &pool->po_list) - continue; - - if (kiblnd_pool_is_idle(pool, now)) - list_move(&pool->po_list, &zombies); - } - spin_unlock(&ps->ps_lock); - - if (!list_empty(&zombies)) - kiblnd_destroy_pool_list(&zombies); -} - -struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps) -{ - struct list_head *node; - struct kib_pool *pool; - unsigned int interval = 1; - unsigned long time_before; - unsigned int trips = 0; - int rc; - - again: - spin_lock(&ps->ps_lock); - list_for_each_entry(pool, &ps->ps_pool_list, po_list) { - if (list_empty(&pool->po_free_list)) - continue; - - pool->po_allocated++; - pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); - node = pool->po_free_list.next; - list_del(node); - - if (ps->ps_node_init) { - /* still hold the lock */ - ps->ps_node_init(pool, node); - } - spin_unlock(&ps->ps_lock); - return node; - } - - /* no available tx pool and ... */ - if (ps->ps_increasing) { - /* another thread is allocating a new pool */ - spin_unlock(&ps->ps_lock); - trips++; - CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n", - ps->ps_name, interval, trips); - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(interval); - if (interval < HZ) - interval *= 2; - - goto again; - } - - if (time_before(cfs_time_current(), ps->ps_next_retry)) { - /* someone failed recently */ - spin_unlock(&ps->ps_lock); - return NULL; - } - - ps->ps_increasing = 1; - spin_unlock(&ps->ps_lock); - - CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name); - time_before = cfs_time_current(); - rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool); - CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete", - cfs_time_current() - time_before); - - spin_lock(&ps->ps_lock); - ps->ps_increasing = 0; - if (!rc) { - list_add_tail(&pool->po_list, &ps->ps_pool_list); - } else { - ps->ps_next_retry = cfs_time_shift(IBLND_POOL_RETRY); - CERROR("Can't allocate new %s pool because out of memory\n", - ps->ps_name); - } - spin_unlock(&ps->ps_lock); - - goto again; -} - -static void kiblnd_destroy_tx_pool(struct kib_pool *pool) -{ - struct kib_tx_pool *tpo = container_of(pool, struct kib_tx_pool, tpo_pool); - int i; - - LASSERT(!pool->po_allocated); - - if (tpo->tpo_tx_pages) { - kiblnd_unmap_tx_pool(tpo); - kiblnd_free_pages(tpo->tpo_tx_pages); - } - - if (!tpo->tpo_tx_descs) - goto out; - - for (i = 0; i < pool->po_size; i++) { - struct kib_tx *tx = &tpo->tpo_tx_descs[i]; - - list_del(&tx->tx_list); - kfree(tx->tx_pages); - kfree(tx->tx_frags); - kfree(tx->tx_wrq); - kfree(tx->tx_sge); - kfree(tx->tx_rd); - } - - kfree(tpo->tpo_tx_descs); -out: - kiblnd_fini_pool(pool); - kfree(tpo); -} - -static int kiblnd_tx_pool_size(int ncpts) -{ - int ntx = *kiblnd_tunables.kib_ntx / ncpts; - - return max(IBLND_TX_POOL, ntx); -} - -static int kiblnd_create_tx_pool(struct kib_poolset *ps, int size, - struct kib_pool **pp_po) -{ - int i; - int npg; - struct kib_pool *pool; - struct kib_tx_pool *tpo; - - tpo = kzalloc_cpt(sizeof(*tpo), GFP_NOFS, ps->ps_cpt); - if (!tpo) { - CERROR("Failed to allocate TX pool\n"); - return -ENOMEM; - } - - pool = &tpo->tpo_pool; - kiblnd_init_pool(ps, pool, size); - tpo->tpo_tx_descs = NULL; - tpo->tpo_tx_pages = NULL; - - npg = DIV_ROUND_UP(size * IBLND_MSG_SIZE, PAGE_SIZE); - if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) { - CERROR("Can't allocate tx pages: %d\n", npg); - kfree(tpo); - return -ENOMEM; - } - - tpo->tpo_tx_descs = kzalloc_cpt(size * sizeof(struct kib_tx), - GFP_NOFS, ps->ps_cpt); - if (!tpo->tpo_tx_descs) { - CERROR("Can't allocate %d tx descriptors\n", size); - ps->ps_pool_destroy(pool); - return -ENOMEM; - } - - memset(tpo->tpo_tx_descs, 0, size * sizeof(struct kib_tx)); - - for (i = 0; i < size; i++) { - struct kib_tx *tx = &tpo->tpo_tx_descs[i]; - - tx->tx_pool = tpo; - if (ps->ps_net->ibn_fmr_ps) { - tx->tx_pages = kzalloc_cpt(LNET_MAX_IOV * sizeof(*tx->tx_pages), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_pages) - break; - } - - tx->tx_frags = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_frags), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_frags) - break; - - sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1); - - tx->tx_wrq = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_wrq) - break; - - tx->tx_sge = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_sge), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_sge) - break; - - tx->tx_rd = kzalloc_cpt(offsetof(struct kib_rdma_desc, - rd_frags[IBLND_MAX_RDMA_FRAGS]), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_rd) - break; - } - - if (i == size) { - kiblnd_map_tx_pool(tpo); - *pp_po = pool; - return 0; - } - - ps->ps_pool_destroy(pool); - return -ENOMEM; -} - -static void kiblnd_tx_init(struct kib_pool *pool, struct list_head *node) -{ - struct kib_tx_poolset *tps = container_of(pool->po_owner, - struct kib_tx_poolset, - tps_poolset); - struct kib_tx *tx = list_entry(node, struct kib_tx, tx_list); - - tx->tx_cookie = tps->tps_next_tx_cookie++; -} - -static void kiblnd_net_fini_pools(struct kib_net *net) -{ - int i; - - cfs_cpt_for_each(i, lnet_cpt_table()) { - struct kib_tx_poolset *tps; - struct kib_fmr_poolset *fps; - - if (net->ibn_tx_ps) { - tps = net->ibn_tx_ps[i]; - kiblnd_fini_poolset(&tps->tps_poolset); - } - - if (net->ibn_fmr_ps) { - fps = net->ibn_fmr_ps[i]; - kiblnd_fini_fmr_poolset(fps); - } - } - - if (net->ibn_tx_ps) { - cfs_percpt_free(net->ibn_tx_ps); - net->ibn_tx_ps = NULL; - } - - if (net->ibn_fmr_ps) { - cfs_percpt_free(net->ibn_fmr_ps); - net->ibn_fmr_ps = NULL; - } -} - -static int kiblnd_net_init_pools(struct kib_net *net, struct lnet_ni *ni, - __u32 *cpts, int ncpts) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - int cpt; - int rc; - int i; - - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - - if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { - CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", - tunables->lnd_fmr_pool_size, - *kiblnd_tunables.kib_ntx / 4); - rc = -EINVAL; - goto failed; - } - - /* - * TX pool must be created later than FMR, see LU-2268 - * for details - */ - LASSERT(!net->ibn_tx_ps); - - /* - * premapping can fail if ibd_nmr > 1, so we always create - * FMR pool and map-on-demand if premapping failed - * - * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset - * The number of struct kib_fmr_poolsets create is equal to the - * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt]. - */ - net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct kib_fmr_poolset)); - if (!net->ibn_fmr_ps) { - CERROR("Failed to allocate FMR pool array\n"); - rc = -ENOMEM; - goto failed; - } - - for (i = 0; i < ncpts; i++) { - cpt = !cpts ? i : cpts[i]; - rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts, - net, tunables); - if (rc) { - CERROR("Can't initialize FMR pool for CPT %d: %d\n", - cpt, rc); - goto failed; - } - } - - if (i > 0) - LASSERT(i == ncpts); - - /* - * cfs_precpt_alloc is creating an array of struct kib_tx_poolset - * The number of struct kib_tx_poolsets create is equal to the - * number of CPTs that exist, i.e net->ibn_tx_ps[cpt]. - */ - net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct kib_tx_poolset)); - if (!net->ibn_tx_ps) { - CERROR("Failed to allocate tx pool array\n"); - rc = -ENOMEM; - goto failed; - } - - for (i = 0; i < ncpts; i++) { - cpt = !cpts ? i : cpts[i]; - rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset, - cpt, net, "TX", - kiblnd_tx_pool_size(ncpts), - kiblnd_create_tx_pool, - kiblnd_destroy_tx_pool, - kiblnd_tx_init, NULL); - if (rc) { - CERROR("Can't initialize TX pool for CPT %d: %d\n", - cpt, rc); - goto failed; - } - } - - return 0; - failed: - kiblnd_net_fini_pools(net); - LASSERT(rc); - return rc; -} - -static int kiblnd_hdev_get_attr(struct kib_hca_dev *hdev) -{ - /* - * It's safe to assume a HCA can handle a page size - * matching that of the native system - */ - hdev->ibh_page_shift = PAGE_SHIFT; - hdev->ibh_page_size = 1 << PAGE_SHIFT; - hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1); - - hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size; - if (hdev->ibh_mr_size == ~0ULL) { - hdev->ibh_mr_shift = 64; - return 0; - } - - CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size); - return -EINVAL; -} - -void kiblnd_hdev_destroy(struct kib_hca_dev *hdev) -{ - if (hdev->ibh_pd) - ib_dealloc_pd(hdev->ibh_pd); - - if (hdev->ibh_cmid) - rdma_destroy_id(hdev->ibh_cmid); - - kfree(hdev); -} - -/* DUMMY */ -static int kiblnd_dummy_callback(struct rdma_cm_id *cmid, - struct rdma_cm_event *event) -{ - return 0; -} - -static int kiblnd_dev_need_failover(struct kib_dev *dev) -{ - struct rdma_cm_id *cmid; - struct sockaddr_in srcaddr; - struct sockaddr_in dstaddr; - int rc; - - if (!dev->ibd_hdev || /* initializing */ - !dev->ibd_hdev->ibh_cmid || /* listener is dead */ - *kiblnd_tunables.kib_dev_failover > 1) /* debugging */ - return 1; - - /* - * XXX: it's UGLY, but I don't have better way to find - * ib-bonding HCA failover because: - * - * a. no reliable CM event for HCA failover... - * b. no OFED API to get ib_device for current net_device... - * - * We have only two choices at this point: - * - * a. rdma_bind_addr(), it will conflict with listener cmid - * b. rdma_resolve_addr() to zero addr - */ - cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP, - IB_QPT_RC); - if (IS_ERR(cmid)) { - rc = PTR_ERR(cmid); - CERROR("Failed to create cmid for failover: %d\n", rc); - return rc; - } - - memset(&srcaddr, 0, sizeof(srcaddr)); - srcaddr.sin_family = AF_INET; - srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip); - - memset(&dstaddr, 0, sizeof(dstaddr)); - dstaddr.sin_family = AF_INET; - rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr, - (struct sockaddr *)&dstaddr, 1); - if (rc || !cmid->device) { - CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", - dev->ibd_ifname, &dev->ibd_ifip, - cmid->device, rc); - rdma_destroy_id(cmid); - return rc; - } - - rc = dev->ibd_hdev->ibh_ibdev != cmid->device; /* true for failover */ - rdma_destroy_id(cmid); - - return rc; -} - -int kiblnd_dev_failover(struct kib_dev *dev) -{ - LIST_HEAD(zombie_tpo); - LIST_HEAD(zombie_ppo); - LIST_HEAD(zombie_fpo); - struct rdma_cm_id *cmid = NULL; - struct kib_hca_dev *hdev = NULL; - struct ib_pd *pd; - struct kib_net *net; - struct sockaddr_in addr; - unsigned long flags; - int rc = 0; - int i; - - LASSERT(*kiblnd_tunables.kib_dev_failover > 1 || - dev->ibd_can_failover || !dev->ibd_hdev); - - rc = kiblnd_dev_need_failover(dev); - if (rc <= 0) - goto out; - - if (dev->ibd_hdev && - dev->ibd_hdev->ibh_cmid) { - /* - * XXX it's not good to close old listener at here, - * because we can fail to create new listener. - * But we have to close it now, otherwise rdma_bind_addr - * will return EADDRINUSE... How crap! - */ - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - cmid = dev->ibd_hdev->ibh_cmid; - /* - * make next schedule of kiblnd_dev_need_failover() - * return 1 for me - */ - dev->ibd_hdev->ibh_cmid = NULL; - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - rdma_destroy_id(cmid); - } - - cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP, - IB_QPT_RC); - if (IS_ERR(cmid)) { - rc = PTR_ERR(cmid); - CERROR("Failed to create cmid for failover: %d\n", rc); - goto out; - } - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(dev->ibd_ifip); - addr.sin_port = htons(*kiblnd_tunables.kib_service); - - /* Bind to failover device or port */ - rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr); - if (rc || !cmid->device) { - CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", - dev->ibd_ifname, &dev->ibd_ifip, - cmid->device, rc); - rdma_destroy_id(cmid); - goto out; - } - - hdev = kzalloc(sizeof(*hdev), GFP_NOFS); - if (!hdev) { - CERROR("Failed to allocate kib_hca_dev\n"); - rdma_destroy_id(cmid); - rc = -ENOMEM; - goto out; - } - - atomic_set(&hdev->ibh_ref, 1); - hdev->ibh_dev = dev; - hdev->ibh_cmid = cmid; - hdev->ibh_ibdev = cmid->device; - - pd = ib_alloc_pd(cmid->device, 0); - if (IS_ERR(pd)) { - rc = PTR_ERR(pd); - CERROR("Can't allocate PD: %d\n", rc); - goto out; - } - - hdev->ibh_pd = pd; - - rc = rdma_listen(cmid, 0); - if (rc) { - CERROR("Can't start new listener: %d\n", rc); - goto out; - } - - rc = kiblnd_hdev_get_attr(hdev); - if (rc) { - CERROR("Can't get device attributes: %d\n", rc); - goto out; - } - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - swap(dev->ibd_hdev, hdev); /* take over the refcount */ - - list_for_each_entry(net, &dev->ibd_nets, ibn_list) { - cfs_cpt_for_each(i, lnet_cpt_table()) { - kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset, - &zombie_tpo); - - if (net->ibn_fmr_ps) - kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i], - &zombie_fpo); - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - out: - if (!list_empty(&zombie_tpo)) - kiblnd_destroy_pool_list(&zombie_tpo); - if (!list_empty(&zombie_ppo)) - kiblnd_destroy_pool_list(&zombie_ppo); - if (!list_empty(&zombie_fpo)) - kiblnd_destroy_fmr_pool_list(&zombie_fpo); - if (hdev) - kiblnd_hdev_decref(hdev); - - if (rc) - dev->ibd_failed_failover++; - else - dev->ibd_failed_failover = 0; - - return rc; -} - -void kiblnd_destroy_dev(struct kib_dev *dev) -{ - LASSERT(!dev->ibd_nnets); - LASSERT(list_empty(&dev->ibd_nets)); - - list_del(&dev->ibd_fail_list); - list_del(&dev->ibd_list); - - if (dev->ibd_hdev) - kiblnd_hdev_decref(dev->ibd_hdev); - - kfree(dev); -} - -static struct kib_dev *kiblnd_create_dev(char *ifname) -{ - struct net_device *netdev; - struct kib_dev *dev; - __u32 netmask; - __u32 ip; - int up; - int rc; - - rc = lnet_ipif_query(ifname, &up, &ip, &netmask); - if (rc) { - CERROR("Can't query IPoIB interface %s: %d\n", - ifname, rc); - return NULL; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ifname); - return NULL; - } - - dev = kzalloc(sizeof(*dev), GFP_NOFS); - if (!dev) - return NULL; - - netdev = dev_get_by_name(&init_net, ifname); - if (!netdev) { - dev->ibd_can_failover = 0; - } else { - dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER); - dev_put(netdev); - } - - INIT_LIST_HEAD(&dev->ibd_nets); - INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */ - INIT_LIST_HEAD(&dev->ibd_fail_list); - dev->ibd_ifip = ip; - strcpy(&dev->ibd_ifname[0], ifname); - - /* initialize the device */ - rc = kiblnd_dev_failover(dev); - if (rc) { - CERROR("Can't initialize device: %d\n", rc); - kfree(dev); - return NULL; - } - - list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs); - return dev; -} - -static void kiblnd_base_shutdown(void) -{ - struct kib_sched_info *sched; - int i; - - LASSERT(list_empty(&kiblnd_data.kib_devs)); - - switch (kiblnd_data.kib_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - case IBLND_INIT_DATA: - LASSERT(kiblnd_data.kib_peers); - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) - LASSERT(list_empty(&kiblnd_data.kib_peers[i])); - LASSERT(list_empty(&kiblnd_data.kib_connd_zombies)); - LASSERT(list_empty(&kiblnd_data.kib_connd_conns)); - LASSERT(list_empty(&kiblnd_data.kib_reconn_list)); - LASSERT(list_empty(&kiblnd_data.kib_reconn_wait)); - - /* flag threads to terminate; wake and wait for them to die */ - kiblnd_data.kib_shutdown = 1; - - /* - * NB: we really want to stop scheduler threads net by net - * instead of the whole module, this should be improved - * with dynamic configuration LNet - */ - cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) - wake_up_all(&sched->ibs_waitq); - - wake_up_all(&kiblnd_data.kib_connd_waitq); - wake_up_all(&kiblnd_data.kib_failover_waitq); - - i = 2; - while (atomic_read(&kiblnd_data.kib_nthreads)) { - i++; - /* power of 2 ? */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for %d threads to terminate\n", - atomic_read(&kiblnd_data.kib_nthreads)); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - - /* fall through */ - - case IBLND_INIT_NOTHING: - break; - } - - kvfree(kiblnd_data.kib_peers); - - if (kiblnd_data.kib_scheds) - cfs_percpt_free(kiblnd_data.kib_scheds); - - kiblnd_data.kib_init = IBLND_INIT_NOTHING; - module_put(THIS_MODULE); -} - -static void kiblnd_shutdown(struct lnet_ni *ni) -{ - struct kib_net *net = ni->ni_data; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - int i; - unsigned long flags; - - LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); - - if (!net) - goto out; - - write_lock_irqsave(g_lock, flags); - net->ibn_shutdown = 1; - write_unlock_irqrestore(g_lock, flags); - - switch (net->ibn_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - /* nuke all existing peers within this net */ - kiblnd_del_peer(ni, LNET_NID_ANY); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&net->ibn_npeers)) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ - "%s: waiting for %d peers to disconnect\n", - libcfs_nid2str(ni->ni_nid), - atomic_read(&net->ibn_npeers)); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - - kiblnd_net_fini_pools(net); - - write_lock_irqsave(g_lock, flags); - LASSERT(net->ibn_dev->ibd_nnets > 0); - net->ibn_dev->ibd_nnets--; - list_del(&net->ibn_list); - write_unlock_irqrestore(g_lock, flags); - - /* fall through */ - - case IBLND_INIT_NOTHING: - LASSERT(!atomic_read(&net->ibn_nconns)); - - if (net->ibn_dev && !net->ibn_dev->ibd_nnets) - kiblnd_destroy_dev(net->ibn_dev); - - break; - } - - net->ibn_init = IBLND_INIT_NOTHING; - ni->ni_data = NULL; - - kfree(net); - -out: - if (list_empty(&kiblnd_data.kib_devs)) - kiblnd_base_shutdown(); -} - -static int kiblnd_base_startup(void) -{ - struct kib_sched_info *sched; - int rc; - int i; - - LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING); - - try_module_get(THIS_MODULE); - /* zero pointers, flags etc */ - memset(&kiblnd_data, 0, sizeof(kiblnd_data)); - - rwlock_init(&kiblnd_data.kib_global_lock); - - INIT_LIST_HEAD(&kiblnd_data.kib_devs); - INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs); - - kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; - kiblnd_data.kib_peers = kvmalloc_array(kiblnd_data.kib_peer_hash_size, - sizeof(struct list_head), - GFP_KERNEL); - if (!kiblnd_data.kib_peers) - goto failed; - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); - - spin_lock_init(&kiblnd_data.kib_connd_lock); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); - INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list); - INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait); - - init_waitqueue_head(&kiblnd_data.kib_connd_waitq); - init_waitqueue_head(&kiblnd_data.kib_failover_waitq); - - kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*sched)); - if (!kiblnd_data.kib_scheds) - goto failed; - - cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) { - int nthrs; - - spin_lock_init(&sched->ibs_lock); - INIT_LIST_HEAD(&sched->ibs_conns); - init_waitqueue_head(&sched->ibs_waitq); - - nthrs = cfs_cpt_weight(lnet_cpt_table(), i); - if (*kiblnd_tunables.kib_nscheds > 0) { - nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds); - } else { - /* - * max to half of CPUs, another half is reserved for - * upper layer modules - */ - nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs); - } - - sched->ibs_nthreads_max = nthrs; - sched->ibs_cpt = i; - } - - kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; - - /* lists/ptrs/locks initialised */ - kiblnd_data.kib_init = IBLND_INIT_DATA; - /*****************************************************/ - - rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd"); - if (rc) { - CERROR("Can't spawn o2iblnd connd: %d\n", rc); - goto failed; - } - - if (*kiblnd_tunables.kib_dev_failover) - rc = kiblnd_thread_start(kiblnd_failover_thread, NULL, - "kiblnd_failover"); - - if (rc) { - CERROR("Can't spawn o2iblnd failover thread: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kiblnd_data.kib_init = IBLND_INIT_ALL; - /*****************************************************/ - - return 0; - - failed: - kiblnd_base_shutdown(); - return -ENETDOWN; -} - -static int kiblnd_start_schedulers(struct kib_sched_info *sched) -{ - int rc = 0; - int nthrs; - int i; - - if (!sched->ibs_nthreads) { - if (*kiblnd_tunables.kib_nscheds > 0) { - nthrs = sched->ibs_nthreads_max; - } else { - nthrs = cfs_cpt_weight(lnet_cpt_table(), - sched->ibs_cpt); - nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs); - nthrs = min(IBLND_N_SCHED_HIGH, nthrs); - } - } else { - LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max); - /* increase one thread if there is new interface */ - nthrs = sched->ibs_nthreads < sched->ibs_nthreads_max; - } - - for (i = 0; i < nthrs; i++) { - long id; - char name[20]; - - id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i); - snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld", - KIB_THREAD_CPT(id), KIB_THREAD_TID(id)); - rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name); - if (!rc) - continue; - - CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", - sched->ibs_cpt, sched->ibs_nthreads + i, rc); - break; - } - - sched->ibs_nthreads += i; - return rc; -} - -static int kiblnd_dev_start_threads(struct kib_dev *dev, int newdev, __u32 *cpts, - int ncpts) -{ - int cpt; - int rc; - int i; - - for (i = 0; i < ncpts; i++) { - struct kib_sched_info *sched; - - cpt = !cpts ? i : cpts[i]; - sched = kiblnd_data.kib_scheds[cpt]; - - if (!newdev && sched->ibs_nthreads > 0) - continue; - - rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]); - if (rc) { - CERROR("Failed to start scheduler threads for %s\n", - dev->ibd_ifname); - return rc; - } - } - return 0; -} - -static struct kib_dev *kiblnd_dev_search(char *ifname) -{ - struct kib_dev *alias = NULL; - struct kib_dev *dev; - char *colon; - char *colon2; - - colon = strchr(ifname, ':'); - list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { - if (!strcmp(&dev->ibd_ifname[0], ifname)) - return dev; - - if (alias) - continue; - - colon2 = strchr(dev->ibd_ifname, ':'); - if (colon) - *colon = 0; - if (colon2) - *colon2 = 0; - - if (!strcmp(&dev->ibd_ifname[0], ifname)) - alias = dev; - - if (colon) - *colon = ':'; - if (colon2) - *colon2 = ':'; - } - return alias; -} - -static int kiblnd_startup(struct lnet_ni *ni) -{ - char *ifname; - struct kib_dev *ibdev = NULL; - struct kib_net *net; - struct timespec64 tv; - unsigned long flags; - int rc; - int newdev; - - LASSERT(ni->ni_lnd == &the_o2iblnd); - - if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { - rc = kiblnd_base_startup(); - if (rc) - return rc; - } - - net = kzalloc(sizeof(*net), GFP_NOFS); - ni->ni_data = net; - if (!net) - goto net_failed; - - ktime_get_real_ts64(&tv); - net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC + - tv.tv_nsec / NSEC_PER_USEC; - - rc = kiblnd_tunables_setup(ni); - if (rc) - goto net_failed; - - if (ni->ni_interfaces[0]) { - /* Use the IPoIB interface specified in 'networks=' */ - - BUILD_BUG_ON(LNET_MAX_INTERFACES <= 1); - if (ni->ni_interfaces[1]) { - CERROR("Multiple interfaces not supported\n"); - goto failed; - } - - ifname = ni->ni_interfaces[0]; - } else { - ifname = *kiblnd_tunables.kib_default_ipif; - } - - if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) { - CERROR("IPoIB interface name too long: %s\n", ifname); - goto failed; - } - - ibdev = kiblnd_dev_search(ifname); - - newdev = !ibdev; - /* hmm...create kib_dev even for alias */ - if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname)) - ibdev = kiblnd_create_dev(ifname); - - if (!ibdev) - goto failed; - - net->ibn_dev = ibdev; - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); - - rc = kiblnd_dev_start_threads(ibdev, newdev, - ni->ni_cpts, ni->ni_ncpts); - if (rc) - goto failed; - - rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts); - if (rc) { - CERROR("Failed to initialize NI pools: %d\n", rc); - goto failed; - } - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - ibdev->ibd_nnets++; - list_add_tail(&net->ibn_list, &ibdev->ibd_nets); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - net->ibn_init = IBLND_INIT_ALL; - - return 0; - -failed: - if (!net->ibn_dev && ibdev) - kiblnd_destroy_dev(ibdev); - -net_failed: - kiblnd_shutdown(ni); - - CDEBUG(D_NET, "%s failed\n", __func__); - return -ENETDOWN; -} - -static struct lnet_lnd the_o2iblnd = { - .lnd_type = O2IBLND, - .lnd_startup = kiblnd_startup, - .lnd_shutdown = kiblnd_shutdown, - .lnd_ctl = kiblnd_ctl, - .lnd_query = kiblnd_query, - .lnd_send = kiblnd_send, - .lnd_recv = kiblnd_recv, -}; - -static void __exit ko2iblnd_exit(void) -{ - lnet_unregister_lnd(&the_o2iblnd); -} - -static int __init ko2iblnd_init(void) -{ - BUILD_BUG_ON(sizeof(struct kib_msg) > IBLND_MSG_SIZE); - BUILD_BUG_ON(offsetof(struct kib_msg, - ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - > IBLND_MSG_SIZE); - BUILD_BUG_ON(offsetof(struct kib_msg, - ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - > IBLND_MSG_SIZE); - - kiblnd_tunables_init(); - - lnet_register_lnd(&the_o2iblnd); - - return 0; -} - -MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver"); -MODULE_VERSION("2.7.0"); -MODULE_LICENSE("GPL"); - -module_init(ko2iblnd_init); -module_exit(ko2iblnd_exit); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h deleted file mode 100644 index b18911d09e9a..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ /dev/null @@ -1,1038 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd.h - * - * Author: Eric Barton <eric@bartonsoftware.com> - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/unistd.h> -#include <linux/uio.h> -#include <linux/uaccess.h> - -#include <linux/io.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/pci.h> - -#include <net/sock.h> -#include <linux/in.h> - -#include <rdma/rdma_cm.h> -#include <rdma/ib_cm.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_fmr_pool.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> - -#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */ -/* # scheduler loops before reschedule */ -#define IBLND_RESCHED 100 - -#define IBLND_N_SCHED 2 -#define IBLND_N_SCHED_HIGH 4 - -struct kib_tunables { - int *kib_dev_failover; /* HCA failover */ - unsigned int *kib_service; /* IB service number */ - int *kib_min_reconnect_interval; /* first failed connection retry... */ - int *kib_max_reconnect_interval; /* exponentially increasing to this */ - int *kib_cksum; /* checksum struct kib_msg? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_keepalive; /* keepalive timeout (seconds) */ - int *kib_ntx; /* # tx descs */ - char **kib_default_ipif; /* default IPoIB interface */ - int *kib_retry_count; - int *kib_rnr_retry_count; - int *kib_ib_mtu; /* IB MTU */ - int *kib_require_priv_port; /* accept only privileged ports */ - int *kib_use_priv_port; /* use privileged port for active connect */ - int *kib_nscheds; /* # threads on each CPT */ -}; - -extern struct kib_tunables kiblnd_tunables; - -#define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */ -#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */ - -#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */ -#define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1) /* Max # of peer credits */ - -/* when eagerly to return credits */ -#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \ - IBLND_CREDIT_HIGHWATER_V1 : \ - t->lnd_peercredits_hiw) - -#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \ - cb, dev, \ - ps, qpt) - -/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */ -#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) -#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) - -#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */ -#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */ -#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */ - -/************************/ -/* derived constants... */ -/* Pools (shared by connections on each CPT) */ -/* These pools can grow at runtime, so don't need give a very large value */ -#define IBLND_TX_POOL 256 -#define IBLND_FMR_POOL 256 -#define IBLND_FMR_POOL_FLUSH 192 - -#define IBLND_RX_MSGS(c) \ - ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version)) -#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE) -#define IBLND_RX_MSG_PAGES(c) \ - ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE) - -/* WRs and CQEs (per connection) */ -#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) -#define IBLND_SEND_WRS(c) \ - (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \ - kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni)) -#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c)) - -struct kib_hca_dev; - -/* o2iblnd can run over aliased interface */ -#ifdef IFALIASZ -#define KIB_IFNAME_SIZE IFALIASZ -#else -#define KIB_IFNAME_SIZE 256 -#endif - -struct kib_dev { - struct list_head ibd_list; /* chain on kib_devs */ - struct list_head ibd_fail_list; /* chain on kib_failed_devs */ - __u32 ibd_ifip; /* IPoIB interface IP */ - - /* IPoIB interface name */ - char ibd_ifname[KIB_IFNAME_SIZE]; - int ibd_nnets; /* # nets extant */ - - unsigned long ibd_next_failover; - int ibd_failed_failover; /* # failover failures */ - unsigned int ibd_failover; /* failover in progress */ - unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */ - struct list_head ibd_nets; - struct kib_hca_dev *ibd_hdev; -}; - -struct kib_hca_dev { - struct rdma_cm_id *ibh_cmid; /* listener cmid */ - struct ib_device *ibh_ibdev; /* IB device */ - int ibh_page_shift; /* page shift of current HCA */ - int ibh_page_size; /* page size of current HCA */ - __u64 ibh_page_mask; /* page mask of current HCA */ - int ibh_mr_shift; /* bits shift of max MR size */ - __u64 ibh_mr_size; /* size of MR */ - struct ib_pd *ibh_pd; /* PD */ - struct kib_dev *ibh_dev; /* owner */ - atomic_t ibh_ref; /* refcount */ -}; - -/** # of seconds to keep pool alive */ -#define IBLND_POOL_DEADLINE 300 -/** # of seconds to retry if allocation failed */ -#define IBLND_POOL_RETRY 1 - -struct kib_pages { - int ibp_npages; /* # pages */ - struct page *ibp_pages[0]; /* page array */ -}; - -struct kib_pool; -struct kib_poolset; - -typedef int (*kib_ps_pool_create_t)(struct kib_poolset *ps, - int inc, struct kib_pool **pp_po); -typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po); -typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node); -typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node); - -struct kib_net; - -#define IBLND_POOL_NAME_LEN 32 - -struct kib_poolset { - spinlock_t ps_lock; /* serialize */ - struct kib_net *ps_net; /* network it belongs to */ - char ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */ - struct list_head ps_pool_list; /* list of pools */ - struct list_head ps_failed_pool_list;/* failed pool list */ - unsigned long ps_next_retry; /* time stamp for retry if */ - /* failed to allocate */ - int ps_increasing; /* is allocating new pool */ - int ps_pool_size; /* new pool size */ - int ps_cpt; /* CPT id */ - - kib_ps_pool_create_t ps_pool_create; /* create a new pool */ - kib_ps_pool_destroy_t ps_pool_destroy; /* destroy a pool */ - kib_ps_node_init_t ps_node_init; /* initialize new allocated node */ - kib_ps_node_fini_t ps_node_fini; /* finalize node */ -}; - -struct kib_pool { - struct list_head po_list; /* chain on pool list */ - struct list_head po_free_list; /* pre-allocated node */ - struct kib_poolset *po_owner; /* pool_set of this pool */ - unsigned long po_deadline; /* deadline of this pool */ - int po_allocated; /* # of elements in use */ - int po_failed; /* pool is created on failed HCA */ - int po_size; /* # of pre-allocated elements */ -}; - -struct kib_tx_poolset { - struct kib_poolset tps_poolset; /* pool-set */ - __u64 tps_next_tx_cookie; /* cookie of TX */ -}; - -struct kib_tx_pool { - struct kib_pool tpo_pool; /* pool */ - struct kib_hca_dev *tpo_hdev; /* device for this pool */ - struct kib_tx *tpo_tx_descs; /* all the tx descriptors */ - struct kib_pages *tpo_tx_pages; /* premapped tx msg pages */ -}; - -struct kib_fmr_poolset { - spinlock_t fps_lock; /* serialize */ - struct kib_net *fps_net; /* IB network */ - struct list_head fps_pool_list; /* FMR pool list */ - struct list_head fps_failed_pool_list;/* FMR pool list */ - __u64 fps_version; /* validity stamp */ - int fps_cpt; /* CPT id */ - int fps_pool_size; - int fps_flush_trigger; - int fps_cache; - int fps_increasing; /* is allocating new pool */ - unsigned long fps_next_retry; /* time stamp for retry if*/ - /* failed to allocate */ -}; - -struct kib_fast_reg_descriptor { /* For fast registration */ - struct list_head frd_list; - struct ib_send_wr frd_inv_wr; - struct ib_reg_wr frd_fastreg_wr; - struct ib_mr *frd_mr; - bool frd_valid; -}; - -struct kib_fmr_pool { - struct list_head fpo_list; /* chain on pool list */ - struct kib_hca_dev *fpo_hdev; /* device for this pool */ - struct kib_fmr_poolset *fpo_owner; /* owner of this pool */ - union { - struct { - struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */ - } fmr; - struct { /* For fast registration */ - struct list_head fpo_pool_list; - int fpo_pool_size; - } fast_reg; - }; - unsigned long fpo_deadline; /* deadline of this pool */ - int fpo_failed; /* fmr pool is failed */ - int fpo_map_count; /* # of mapped FMR */ - int fpo_is_fmr; -}; - -struct kib_fmr { - struct kib_fmr_pool *fmr_pool; /* pool of FMR */ - struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */ - struct kib_fast_reg_descriptor *fmr_frd; - u32 fmr_key; -}; - -struct kib_net { - struct list_head ibn_list; /* chain on struct kib_dev::ibd_nets */ - __u64 ibn_incarnation;/* my epoch */ - int ibn_init; /* initialisation state */ - int ibn_shutdown; /* shutting down? */ - - atomic_t ibn_npeers; /* # peers extant */ - atomic_t ibn_nconns; /* # connections extant */ - - struct kib_tx_poolset **ibn_tx_ps; /* tx pool-set */ - struct kib_fmr_poolset **ibn_fmr_ps; /* fmr pool-set */ - - struct kib_dev *ibn_dev; /* underlying IB device */ -}; - -#define KIB_THREAD_SHIFT 16 -#define KIB_THREAD_ID(cpt, tid) ((cpt) << KIB_THREAD_SHIFT | (tid)) -#define KIB_THREAD_CPT(id) ((id) >> KIB_THREAD_SHIFT) -#define KIB_THREAD_TID(id) ((id) & ((1UL << KIB_THREAD_SHIFT) - 1)) - -struct kib_sched_info { - spinlock_t ibs_lock; /* serialise */ - wait_queue_head_t ibs_waitq; /* schedulers sleep here */ - struct list_head ibs_conns; /* conns to check for rx completions */ - int ibs_nthreads; /* number of scheduler threads */ - int ibs_nthreads_max; /* max allowed scheduler threads */ - int ibs_cpt; /* CPT id */ -}; - -struct kib_data { - int kib_init; /* initialisation state */ - int kib_shutdown; /* shut down? */ - struct list_head kib_devs; /* IB devices extant */ - struct list_head kib_failed_devs; /* list head of failed devices */ - wait_queue_head_t kib_failover_waitq; /* schedulers sleep here */ - atomic_t kib_nthreads; /* # live threads */ - rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */ - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - void *kib_connd; /* the connd task (serialisation assertions) */ - struct list_head kib_connd_conns; /* connections to setup/teardown */ - struct list_head kib_connd_zombies; /* connections with zero refcount */ - /* connections to reconnect */ - struct list_head kib_reconn_list; - /* peers wait for reconnection */ - struct list_head kib_reconn_wait; - /** - * The second that peers are pulled out from \a kib_reconn_wait - * for reconnection. - */ - time64_t kib_reconn_sec; - - wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ - spinlock_t kib_connd_lock; /* serialise */ - struct ib_qp_attr kib_error_qpa; /* QP->ERROR */ - struct kib_sched_info **kib_scheds; /* percpt data for schedulers */ -}; - -#define IBLND_INIT_NOTHING 0 -#define IBLND_INIT_DATA 1 -#define IBLND_INIT_ALL 2 - -/************************************************************************ - * IB Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -struct kib_connparams { - __u16 ibcp_queue_depth; - __u16 ibcp_max_frags; - __u32 ibcp_max_msg_size; -} WIRE_ATTR; - -struct kib_immediate_msg { - struct lnet_hdr ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR; - -struct kib_rdma_frag { - __u32 rf_nob; /* # bytes this frag */ - __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */ -} WIRE_ATTR; - -struct kib_rdma_desc { - __u32 rd_key; /* local/remote key */ - __u32 rd_nfrags; /* # fragments */ - struct kib_rdma_frag rd_frags[0]; /* buffer frags */ -} WIRE_ATTR; - -struct kib_putreq_msg { - struct lnet_hdr ibprm_hdr; /* portals header */ - __u64 ibprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR; - -struct kib_putack_msg { - __u64 ibpam_src_cookie; /* reflected completion cookie */ - __u64 ibpam_dst_cookie; /* opaque completion cookie */ - struct kib_rdma_desc ibpam_rd; /* sender's sink buffer */ -} WIRE_ATTR; - -struct kib_get_msg { - struct lnet_hdr ibgm_hdr; /* portals header */ - __u64 ibgm_cookie; /* opaque completion cookie */ - struct kib_rdma_desc ibgm_rd; /* rdma descriptor */ -} WIRE_ATTR; - -struct kib_completion_msg { - __u64 ibcm_cookie; /* opaque completion cookie */ - __s32 ibcm_status; /* < 0 failure: >= 0 length */ -} WIRE_ATTR; - -struct kib_msg { - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an ibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - - union { - struct kib_connparams connparams; - struct kib_immediate_msg immediate; - struct kib_putreq_msg putreq; - struct kib_putack_msg putack; - struct kib_get_msg get; - struct kib_completion_msg completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR; - -#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */ - -#define IBLND_MSG_VERSION_1 0x11 -#define IBLND_MSG_VERSION_2 0x12 -#define IBLND_MSG_VERSION IBLND_MSG_VERSION_2 - -#define IBLND_MSG_CONNREQ 0xc0 /* connection request */ -#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */ -#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ -#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ -#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ -#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ -#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ -#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ - -struct kib_rej { - __u32 ibr_magic; /* sender's magic */ - __u16 ibr_version; /* sender's version */ - __u8 ibr_why; /* reject reason */ - __u8 ibr_padding; /* padding */ - __u64 ibr_incarnation; /* incarnation of peer */ - struct kib_connparams ibr_cp; /* connection parameters */ -} WIRE_ATTR; - -/* connection rejection reasons */ -#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */ -#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */ -#define IBLND_REJECT_FATAL 3 /* Anything else */ -#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */ -#define IBLND_REJECT_CONN_STALE 5 /* stale peer */ -/* peer's rdma frags doesn't match mine */ -#define IBLND_REJECT_RDMA_FRAGS 6 -/* peer's msg queue size doesn't match mine */ -#define IBLND_REJECT_MSG_QUEUE_SIZE 7 - -/***********************************************************************/ - -struct kib_rx { /* receive message */ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - enum ib_wc_status rx_status; /* completion status */ - struct kib_msg *rx_msg; /* message buffer (host vaddr) */ - __u64 rx_msgaddr; /* message buffer (I/O addr) */ - DECLARE_PCI_UNMAP_ADDR(rx_msgunmap); /* for dma_unmap_single() */ - struct ib_recv_wr rx_wrq; /* receive work item... */ - struct ib_sge rx_sge; /* ...and its memory */ -}; - -#define IBLND_POSTRX_DONT_POST 0 /* don't post */ -#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */ -#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */ -#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */ - -struct kib_tx { /* transmit message */ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_tx_pool *tx_pool; /* pool I'm from */ - struct kib_conn *tx_conn; /* owning conn */ - short tx_sending; /* # tx callbacks outstanding */ - short tx_queued; /* queued for sending */ - short tx_waiting; /* waiting for peer */ - int tx_status; /* LNET completion status */ - unsigned long tx_deadline; /* completion deadline */ - __u64 tx_cookie; /* completion cookie */ - struct lnet_msg *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ - struct kib_msg *tx_msg; /* message buffer (host vaddr) */ - __u64 tx_msgaddr; /* message buffer (I/O addr) */ - DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */ - int tx_nwrq; /* # send work items */ - struct ib_rdma_wr *tx_wrq; /* send work items... */ - struct ib_sge *tx_sge; /* ...and their memory */ - struct kib_rdma_desc *tx_rd; /* rdma descriptor */ - int tx_nfrags; /* # entries in... */ - struct scatterlist *tx_frags; /* dma_map_sg descriptor */ - __u64 *tx_pages; /* rdma phys page addrs */ - struct kib_fmr fmr; /* FMR */ - int tx_dmadir; /* dma direction */ -}; - -struct kib_connvars { - struct kib_msg cv_msg; /* connection-in-progress variables */ -}; - -struct kib_conn { - struct kib_sched_info *ibc_sched; /* scheduler information */ - struct kib_peer *ibc_peer; /* owning peer */ - struct kib_hca_dev *ibc_hdev; /* HCA bound on */ - struct list_head ibc_list; /* stash on peer's conn list */ - struct list_head ibc_sched_list; /* schedule for attention */ - __u16 ibc_version; /* version of connection */ - /* reconnect later */ - __u16 ibc_reconnect:1; - __u64 ibc_incarnation; /* which instance of the peer */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_noops_posted; /* # uncompleted NOOPs */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # ACK/DONE msg credits */ - int ibc_comms_error; /* set on comms error */ - /* connections queue depth */ - __u16 ibc_queue_depth; - /* connections max frags */ - __u16 ibc_max_frags; - unsigned int ibc_nrx:16; /* receive buffers owned */ - unsigned int ibc_scheduled:1; /* scheduled for attention */ - unsigned int ibc_ready:1; /* CQ callback fired */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_connd_list; /* link chain for */ - /* kiblnd_check_conns only */ - struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ - struct list_head ibc_tx_noops; /* IBLND_MSG_NOOPs for */ - /* IBLND_MSG_VERSION_1 */ - struct list_head ibc_tx_queue; /* sends that need a credit */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a */ - /* credit */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need to */ - /* reserve an ACK/DONE msg */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - struct kib_rx *ibc_rxs; /* the rx descs */ - struct kib_pages *ibc_rx_pages; /* premapped rx msg pages */ - - struct rdma_cm_id *ibc_cmid; /* CM id */ - struct ib_cq *ibc_cq; /* completion queue */ - - struct kib_connvars *ibc_connvars; /* in-progress connection state */ -}; - -#define IBLND_CONN_INIT 0 /* being initialised */ -#define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */ -#define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */ -#define IBLND_CONN_ESTABLISHED 3 /* connection established */ -#define IBLND_CONN_CLOSING 4 /* being closed */ -#define IBLND_CONN_DISCONNECTED 5 /* disconnected */ - -struct kib_peer { - struct list_head ibp_list; /* stash on global peer list */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - struct lnet_ni *ibp_ni; /* LNet interface */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - __u64 ibp_incarnation; /* incarnation of peer */ - /* when (in jiffies) I was last alive */ - unsigned long ibp_last_alive; - /* # users */ - atomic_t ibp_refcount; - /* version of peer */ - __u16 ibp_version; - /* current passive connection attempts */ - unsigned short ibp_accepting; - /* current active connection attempts */ - unsigned short ibp_connecting; - /* reconnect this peer later */ - unsigned short ibp_reconnecting:1; - /* counter of how many times we triggered a conn race */ - unsigned char ibp_races; - /* # consecutive reconnection attempts to this peer */ - unsigned int ibp_reconnected; - /* errno on closing this peer */ - int ibp_error; - /* max map_on_demand */ - __u16 ibp_max_frags; - /* max_peer_credits */ - __u16 ibp_queue_depth; -}; - -extern struct kib_data kiblnd_data; - -void kiblnd_hdev_destroy(struct kib_hca_dev *hdev); - -int kiblnd_msg_queue_size(int version, struct lnet_ni *ni); - -/* max # of fragments configured by user */ -static inline int -kiblnd_cfg_rdma_frags(struct lnet_ni *ni) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - int mod; - - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - mod = tunables->lnd_map_on_demand; - return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT; -} - -static inline int -kiblnd_rdma_frags(int version, struct lnet_ni *ni) -{ - return version == IBLND_MSG_VERSION_1 ? - (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) : - kiblnd_cfg_rdma_frags(ni); -} - -static inline int -kiblnd_concurrent_sends(int version, struct lnet_ni *ni) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - int concurrent_sends; - - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - concurrent_sends = tunables->lnd_concurrent_sends; - - if (version == IBLND_MSG_VERSION_1) { - if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2) - return IBLND_MSG_QUEUE_SIZE_V1 * 2; - - if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2) - return IBLND_MSG_QUEUE_SIZE_V1 / 2; - } - - return concurrent_sends; -} - -static inline void -kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev) -{ - LASSERT(atomic_read(&hdev->ibh_ref) > 0); - atomic_inc(&hdev->ibh_ref); -} - -static inline void -kiblnd_hdev_decref(struct kib_hca_dev *hdev) -{ - LASSERT(atomic_read(&hdev->ibh_ref) > 0); - if (atomic_dec_and_test(&hdev->ibh_ref)) - kiblnd_hdev_destroy(hdev); -} - -static inline int -kiblnd_dev_can_failover(struct kib_dev *dev) -{ - if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */ - return 0; - - if (!*kiblnd_tunables.kib_dev_failover) /* disabled */ - return 0; - - if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */ - return 1; - - return dev->ibd_can_failover; -} - -#define kiblnd_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kiblnd_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kiblnd_data.kib_connd_zombies); \ - wake_up(&kiblnd_data.kib_connd_waitq); \ - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\ - } \ -} while (0) - -#define kiblnd_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read(&(peer)->ibp_refcount)); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kiblnd_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read(&(peer)->ibp_refcount)); \ - LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kiblnd_destroy_peer(peer); \ -} while (0) - -static inline bool -kiblnd_peer_connecting(struct kib_peer *peer) -{ - return peer->ibp_connecting || - peer->ibp_reconnecting || - peer->ibp_accepting; -} - -static inline bool -kiblnd_peer_idle(struct kib_peer *peer) -{ - return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns); -} - -static inline struct list_head * -kiblnd_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = - ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size; - - return &kiblnd_data.kib_peers[hash]; -} - -static inline int -kiblnd_peer_active(struct kib_peer *peer) -{ - /* Am I in the peer hash table? */ - return !list_empty(&peer->ibp_list); -} - -static inline struct kib_conn * -kiblnd_get_conn_locked(struct kib_peer *peer) -{ - LASSERT(!list_empty(&peer->ibp_conns)); - - /* just return the first connection */ - return list_entry(peer->ibp_conns.next, struct kib_conn, ibc_list); -} - -static inline int -kiblnd_send_keepalive(struct kib_conn *conn) -{ - return (*kiblnd_tunables.kib_keepalive > 0) && - cfs_time_after(jiffies, conn->ibc_last_send + - msecs_to_jiffies(*kiblnd_tunables.kib_keepalive * - MSEC_PER_SEC)); -} - -static inline int -kiblnd_need_noop(struct kib_conn *conn) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - - if (conn->ibc_outstanding_credits < - IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) && - !kiblnd_send_keepalive(conn)) - return 0; /* No need to send NOOP */ - - if (IBLND_OOB_CAPABLE(conn->ibc_version)) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) - return 0; /* NOOP can be piggybacked */ - - /* No tx to piggyback NOOP onto or no credit to send a tx */ - return (list_empty(&conn->ibc_tx_queue) || - !conn->ibc_credits); - } - - if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */ - !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */ - !conn->ibc_credits) /* no credit */ - return 0; - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - !conn->ibc_outstanding_credits) /* giving back credits */ - return 0; - - /* No tx to piggyback NOOP onto or no credit to send a tx */ - return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1); -} - -static inline void -kiblnd_abort_receives(struct kib_conn *conn) -{ - ib_modify_qp(conn->ibc_cmid->qp, - &kiblnd_data.kib_error_qpa, IB_QP_STATE); -} - -static inline const char * -kiblnd_queue2str(struct kib_conn *conn, struct list_head *q) -{ - if (q == &conn->ibc_tx_queue) - return "tx_queue"; - - if (q == &conn->ibc_tx_queue_rsrvd) - return "tx_queue_rsrvd"; - - if (q == &conn->ibc_tx_queue_nocred) - return "tx_queue_nocred"; - - if (q == &conn->ibc_active_txs) - return "active_txs"; - - LBUG(); - return NULL; -} - -/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */ -/* lowest bits of the work request id to stash the work item type. */ - -#define IBLND_WID_INVAL 0 -#define IBLND_WID_TX 1 -#define IBLND_WID_RX 2 -#define IBLND_WID_RDMA 3 -#define IBLND_WID_MR 4 -#define IBLND_WID_MASK 7UL - -static inline __u64 -kiblnd_ptr2wreqid(void *ptr, int type) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT(!(lptr & IBLND_WID_MASK)); - LASSERT(!(type & ~IBLND_WID_MASK)); - return (__u64)(lptr | type); -} - -static inline void * -kiblnd_wreqid2ptr(__u64 wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK); -} - -static inline int -kiblnd_wreqid2type(__u64 wreqid) -{ - return wreqid & IBLND_WID_MASK; -} - -static inline void -kiblnd_set_conn_state(struct kib_conn *conn, int state) -{ - conn->ibc_state = state; - mb(); -} - -static inline void -kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(struct kib_msg, ibm_u) + body_nob; -} - -static inline int -kiblnd_rd_size(struct kib_rdma_desc *rd) -{ - int i; - int size; - - for (i = size = 0; i < rd->rd_nfrags; i++) - size += rd->rd_frags[i].rf_nob; - - return size; -} - -static inline __u64 -kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index) -{ - return rd->rd_frags[index].rf_addr; -} - -static inline __u32 -kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index) -{ - return rd->rd_frags[index].rf_nob; -} - -static inline __u32 -kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index) -{ - return rd->rd_key; -} - -static inline int -kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob) -{ - if (nob < rd->rd_frags[index].rf_nob) { - rd->rd_frags[index].rf_addr += nob; - rd->rd_frags[index].rf_nob -= nob; - } else { - index++; - } - - return index; -} - -static inline int -kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n) -{ - LASSERT(msgtype == IBLND_MSG_GET_REQ || - msgtype == IBLND_MSG_PUT_ACK); - - return msgtype == IBLND_MSG_GET_REQ ? - offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) : - offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]); -} - -static inline __u64 -kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return ib_dma_mapping_error(dev, dma_addr); -} - -static inline __u64 kiblnd_dma_map_single(struct ib_device *dev, - void *msg, size_t size, - enum dma_data_direction direction) -{ - return ib_dma_map_single(dev, msg, size, direction); -} - -static inline void kiblnd_dma_unmap_single(struct ib_device *dev, - __u64 addr, size_t size, - enum dma_data_direction direction) -{ - ib_dma_unmap_single(dev, addr, size, direction); -} - -#define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0) -#define KIBLND_UNMAP_ADDR(p, m, a) (a) - -static inline int kiblnd_dma_map_sg(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - return ib_dma_map_sg(dev, sg, nents, direction); -} - -static inline void kiblnd_dma_unmap_sg(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - ib_dma_unmap_sg(dev, sg, nents, direction); -} - -static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev, - struct scatterlist *sg) -{ - return ib_sg_dma_address(dev, sg); -} - -static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return ib_sg_dma_len(dev, sg); -} - -/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly */ -/* right because OFED1.2 defines it as const, to use it we have to add */ -/* (void *) cast to overcome "const" */ - -#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) -#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) - -void kiblnd_map_rx_descs(struct kib_conn *conn); -void kiblnd_unmap_rx_descs(struct kib_conn *conn); -void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node); -struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps); - -int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx, - struct kib_rdma_desc *rd, __u32 nob, __u64 iov, - struct kib_fmr *fmr); -void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status); - -int kiblnd_tunables_setup(struct lnet_ni *ni); -void kiblnd_tunables_init(void); - -int kiblnd_connd(void *arg); -int kiblnd_scheduler(void *arg); -int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name); -int kiblnd_failover_thread(void *arg); - -int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages); - -int kiblnd_cm_callback(struct rdma_cm_id *cmid, - struct rdma_cm_event *event); -int kiblnd_translate_mtu(int value); - -int kiblnd_dev_failover(struct kib_dev *dev); -int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp, - lnet_nid_t nid); -void kiblnd_destroy_peer(struct kib_peer *peer); -bool kiblnd_reconnect_peer(struct kib_peer *peer); -void kiblnd_destroy_dev(struct kib_dev *dev); -void kiblnd_unlink_peer_locked(struct kib_peer *peer); -struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid); -int kiblnd_close_stale_conns_locked(struct kib_peer *peer, - int version, __u64 incarnation); -int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why); - -struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, - struct rdma_cm_id *cmid, - int state, int version); -void kiblnd_destroy_conn(struct kib_conn *conn); -void kiblnd_close_conn(struct kib_conn *conn, int error); -void kiblnd_close_conn_locked(struct kib_conn *conn, int error); - -void kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid); -void kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, - int status); - -void kiblnd_qp_event(struct ib_event *event, void *arg); -void kiblnd_cq_event(struct ib_event *event, void *arg); -void kiblnd_cq_completion(struct ib_cq *cq, void *arg); - -void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version, - int credits, lnet_nid_t dstnid, __u64 dststamp); -int kiblnd_unpack_msg(struct kib_msg *msg, int nob); -int kiblnd_post_rx(struct kib_rx *rx, int credit); - -int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg); -int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c deleted file mode 100644 index 6690a6cd4e34..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ /dev/null @@ -1,3751 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd_cb.c - * - * Author: Eric Barton <eric@bartonsoftware.com> - */ - -#include "o2iblnd.h" - -#define MAX_CONN_RACES_BEFORE_ABORT 20 - -static void kiblnd_peer_alive(struct kib_peer *peer); -static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error); -static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, - int type, int body_nob); -static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, - int resid, struct kib_rdma_desc *dstrd, - __u64 dstcookie); -static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn); -static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn); -static void kiblnd_unmap_tx(struct lnet_ni *ni, struct kib_tx *tx); -static void kiblnd_check_sends_locked(struct kib_conn *conn); - -static void -kiblnd_tx_done(struct lnet_ni *ni, struct kib_tx *tx) -{ - struct lnet_msg *lntmsg[2]; - struct kib_net *net = ni->ni_data; - int rc; - int i; - - LASSERT(net); - LASSERT(!in_interrupt()); - LASSERT(!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT(!tx->tx_sending); /* mustn't be awaiting sent callback */ - LASSERT(!tx->tx_waiting); /* mustn't be awaiting peer response */ - LASSERT(tx->tx_pool); - - kiblnd_unmap_tx(ni, tx); - - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - rc = tx->tx_status; - - if (tx->tx_conn) { - LASSERT(ni == tx->tx_conn->ibc_peer->ibp_ni); - - kiblnd_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (!lntmsg[i]) - continue; - - lnet_finalize(ni, lntmsg[i], rc); - } -} - -void -kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int status) -{ - struct kib_tx *tx; - - while (!list_empty(txlist)) { - tx = list_entry(txlist->next, struct kib_tx, tx_list); - - list_del(&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kiblnd_tx_done(ni, tx); - } -} - -static struct kib_tx * -kiblnd_get_idle_tx(struct lnet_ni *ni, lnet_nid_t target) -{ - struct kib_net *net = (struct kib_net *)ni->ni_data; - struct list_head *node; - struct kib_tx *tx; - struct kib_tx_poolset *tps; - - tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)]; - node = kiblnd_pool_alloc_node(&tps->tps_poolset); - if (!node) - return NULL; - tx = list_entry(node, struct kib_tx, tx_list); - - LASSERT(!tx->tx_nwrq); - LASSERT(!tx->tx_queued); - LASSERT(!tx->tx_sending); - LASSERT(!tx->tx_waiting); - LASSERT(!tx->tx_status); - LASSERT(!tx->tx_conn); - LASSERT(!tx->tx_lntmsg[0]); - LASSERT(!tx->tx_lntmsg[1]); - LASSERT(!tx->tx_nfrags); - - return tx; -} - -static void -kiblnd_drop_rx(struct kib_rx *rx) -{ - struct kib_conn *conn = rx->rx_conn; - struct kib_sched_info *sched = conn->ibc_sched; - unsigned long flags; - - spin_lock_irqsave(&sched->ibs_lock, flags); - LASSERT(conn->ibc_nrx > 0); - conn->ibc_nrx--; - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - kiblnd_conn_decref(conn); -} - -int -kiblnd_post_rx(struct kib_rx *rx, int credit) -{ - struct kib_conn *conn = rx->rx_conn; - struct kib_net *net = conn->ibc_peer->ibp_ni->ni_data; - struct ib_recv_wr *bad_wrq = NULL; - int rc; - - LASSERT(net); - LASSERT(!in_interrupt()); - LASSERT(credit == IBLND_POSTRX_NO_CREDIT || - credit == IBLND_POSTRX_PEER_CREDIT || - credit == IBLND_POSTRX_RSRVD_CREDIT); - - rx->rx_sge.lkey = conn->ibc_hdev->ibh_pd->local_dma_lkey; - rx->rx_sge.addr = rx->rx_msgaddr; - rx->rx_sge.length = IBLND_MSG_SIZE; - - rx->rx_wrq.next = NULL; - rx->rx_wrq.sg_list = &rx->rx_sge; - rx->rx_wrq.num_sge = 1; - rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX); - - LASSERT(conn->ibc_state >= IBLND_CONN_INIT); - LASSERT(rx->rx_nob >= 0); /* not posted */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) { - kiblnd_drop_rx(rx); /* No more posts for this rx */ - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - - /* NB: need an extra reference after ib_post_recv because we don't - * own this rx (and rx::rx_conn) anymore, LU-5678. - */ - kiblnd_conn_addref(conn); - rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq); - if (unlikely(rc)) { - CERROR("Can't post rx for %s: %d, bad_wrq: %p\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq); - rx->rx_nob = 0; - } - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */ - goto out; - - if (unlikely(rc)) { - kiblnd_close_conn(conn, rc); - kiblnd_drop_rx(rx); /* No more posts for this rx */ - goto out; - } - - if (credit == IBLND_POSTRX_NO_CREDIT) - goto out; - - spin_lock(&conn->ibc_lock); - if (credit == IBLND_POSTRX_PEER_CREDIT) - conn->ibc_outstanding_credits++; - else - conn->ibc_reserved_credits++; - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - -out: - kiblnd_conn_decref(conn); - return rc; -} - -static struct kib_tx * -kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list); - - LASSERT(!tx->tx_queued); - LASSERT(tx->tx_sending || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -static void -kiblnd_handle_completion(struct kib_conn *conn, int txtype, int status, __u64 cookie) -{ - struct kib_tx *tx; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie); - if (!tx) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie %#llx from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_close_conn(conn, -EPROTO); - return; - } - - if (!tx->tx_status) { /* success so far */ - if (status < 0) /* failed? */ - tx->tx_status = status; - else if (txtype == IBLND_MSG_GET_REQ) - lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status); - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && !tx->tx_sending; - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(ni, tx); -} - -static void -kiblnd_send_completion(struct kib_conn *conn, int type, int status, __u64 cookie) -{ - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - struct kib_tx *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - - if (!tx) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kiblnd_init_tx_msg(ni, tx, type, sizeof(struct kib_completion_msg)); - - kiblnd_queue_tx(tx, conn); -} - -static void -kiblnd_handle_rx(struct kib_rx *rx) -{ - struct kib_msg *msg = rx->rx_msg; - struct kib_conn *conn = rx->rx_conn; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - int credits = msg->ibm_credits; - struct kib_tx *tx; - int rc = 0; - int rc2; - int post_credit; - - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - CDEBUG(D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - - if (conn->ibc_credits + credits > - conn->ibc_queue_depth) { - rc2 = conn->ibc_credits; - spin_unlock(&conn->ibc_lock); - - CERROR("Bad credits from %s: %d + %d > %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - rc2, credits, conn->ibc_queue_depth); - - kiblnd_close_conn(conn, -EPROTO); - kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT); - return; - } - - conn->ibc_credits += credits; - - /* This ensures the credit taken by NOOP can be returned */ - if (msg->ibm_type == IBLND_MSG_NOOP && - !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */ - conn->ibc_outstanding_credits++; - - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBLND message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_NO_CREDIT; - rc = -EPROTO; - break; - - case IBLND_MSG_NOOP: - if (IBLND_OOB_CAPABLE(conn->ibc_version)) { - post_credit = IBLND_POSTRX_NO_CREDIT; - break; - } - - if (credits) /* credit already posted */ - post_credit = IBLND_POSTRX_NO_CREDIT; - else /* a keepalive NOOP */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_IMMEDIATE: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_NAK: - CWARN("PUT_NACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_PUT_ACK: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - - spin_lock(&conn->ibc_lock); - tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (!tx) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT(tx->tx_waiting); - /* - * CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. - */ - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE, - kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kiblnd_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBLND_MSG_PUT_DONE: - post_credit = IBLND_POSTRX_PEER_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_GET_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_GET_DONE: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kiblnd_close_conn(conn, rc); - - if (post_credit != IBLND_POSTRX_DONT_POST) - kiblnd_post_rx(rx, post_credit); -} - -static void -kiblnd_rx_complete(struct kib_rx *rx, int status, int nob) -{ - struct kib_msg *msg = rx->rx_msg; - struct kib_conn *conn = rx->rx_conn; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - struct kib_net *net = ni->ni_data; - int rc; - int err = -EIO; - - LASSERT(net); - LASSERT(rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) - goto ignore; - - if (status != IB_WC_SUCCESS) { - CNETERR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), status); - goto failed; - } - - LASSERT(nob >= 0); - rx->rx_nob = nob; - - rc = kiblnd_unpack_msg(msg, rx->rx_nob); - if (rc) { - CERROR("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != ni->ni_nid || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != net->ibn_incarnation) { - CERROR("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - /* set time last known alive */ - kiblnd_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - - write_lock_irqsave(g_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(g_lock, flags); - return; - } - write_unlock_irqrestore(g_lock, flags); - } - kiblnd_handle_rx(rx); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kiblnd_close_conn(conn, err); - ignore: - kiblnd_drop_rx(rx); /* Don't re-post rx. */ -} - -static struct page * -kiblnd_kvaddr_to_page(unsigned long vaddr) -{ - struct page *page; - - if (is_vmalloc_addr((void *)vaddr)) { - page = vmalloc_to_page((void *)vaddr); - LASSERT(page); - return page; - } -#ifdef CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page(vaddr); - LASSERT(page); - return page; -} - -static int -kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc *rd, __u32 nob) -{ - struct kib_hca_dev *hdev; - struct kib_fmr_poolset *fps; - int cpt; - int rc; - - LASSERT(tx->tx_pool); - LASSERT(tx->tx_pool->tpo_pool.po_owner); - - hdev = tx->tx_pool->tpo_hdev; - cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt; - - fps = net->ibn_fmr_ps[cpt]; - rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr); - if (rc) { - CERROR("Can't map %u bytes: %d\n", nob, rc); - return rc; - } - - /* - * If rd is not tx_rd, it's going to get sent to a peer, who will need - * the rkey - */ - rd->rd_key = tx->fmr.fmr_key; - rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask; - rd->rd_frags[0].rf_nob = nob; - rd->rd_nfrags = 1; - - return 0; -} - -static void kiblnd_unmap_tx(struct lnet_ni *ni, struct kib_tx *tx) -{ - struct kib_net *net = ni->ni_data; - - LASSERT(net); - - if (net->ibn_fmr_ps) - kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status); - - if (tx->tx_nfrags) { - kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev, - tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); - tx->tx_nfrags = 0; - } -} - -static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx, - struct kib_rdma_desc *rd, int nfrags) -{ - struct kib_net *net = ni->ni_data; - struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev; - __u32 nob; - int i; - - /* - * If rd is not tx_rd, it's going to get sent to a peer and I'm the - * RDMA sink - */ - tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - tx->tx_nfrags = nfrags; - - rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags, - tx->tx_nfrags, tx->tx_dmadir); - - for (i = 0, nob = 0; i < rd->rd_nfrags; i++) { - rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len( - hdev->ibh_ibdev, &tx->tx_frags[i]); - rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address( - hdev->ibh_ibdev, &tx->tx_frags[i]); - nob += rd->rd_frags[i].rf_nob; - } - - if (net->ibn_fmr_ps) - return kiblnd_fmr_map_tx(net, tx, rd, nob); - - return -EINVAL; -} - -static int -kiblnd_setup_rd_iov(struct lnet_ni *ni, struct kib_tx *tx, - struct kib_rdma_desc *rd, unsigned int niov, - const struct kvec *iov, int offset, int nob) -{ - struct kib_net *net = ni->ni_data; - struct page *page; - struct scatterlist *sg; - unsigned long vaddr; - int fragnob; - int page_offset; - - LASSERT(nob > 0); - LASSERT(niov > 0); - LASSERT(net); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT(niov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT(niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kiblnd_kvaddr_to_page(vaddr); - if (!page) { - CERROR("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - sg_set_page(sg, page, fragnob, page_offset); - sg = sg_next(sg); - if (!sg) { - CERROR("lacking enough sg entries to map tx\n"); - return -EFAULT; - } - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags); -} - -static int -kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, - struct kib_rdma_desc *rd, int nkiov, - const struct bio_vec *kiov, int offset, int nob) -{ - struct kib_net *net = ni->ni_data; - struct scatterlist *sg; - int fragnob; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT(nob > 0); - LASSERT(nkiov > 0); - LASSERT(net); - - while (offset >= kiov->bv_len) { - offset -= kiov->bv_len; - nkiov--; - kiov++; - LASSERT(nkiov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT(nkiov > 0); - - fragnob = min((int)(kiov->bv_len - offset), nob); - - sg_set_page(sg, kiov->bv_page, fragnob, - kiov->bv_offset + offset); - sg = sg_next(sg); - if (!sg) { - CERROR("lacking enough sg entries to map tx\n"); - return -EFAULT; - } - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags); -} - -static int -kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit) - __must_hold(&conn->ibc_lock) -{ - struct kib_msg *msg = tx->tx_msg; - struct kib_peer *peer = conn->ibc_peer; - struct lnet_ni *ni = peer->ibp_ni; - int ver = conn->ibc_version; - int rc; - int done; - - LASSERT(tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT(tx->tx_nwrq > 0); - - LASSERT(!credit || credit == 1); - LASSERT(conn->ibc_outstanding_credits >= 0); - LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth); - LASSERT(conn->ibc_credits >= 0); - LASSERT(conn->ibc_credits <= conn->ibc_queue_depth); - - if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) { - /* tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(peer->ibp_nid)); - return -EAGAIN; - } - - if (credit && !conn->ibc_credits) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(peer->ibp_nid)); - return -EAGAIN; - } - - if (credit && !IBLND_OOB_CAPABLE(ver) && - conn->ibc_credits == 1 && /* last credit reserved */ - msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(peer->ibp_nid)); - return -EAGAIN; - } - - /* NB don't drop ibc_lock before bumping tx_sending */ - list_del(&tx->tx_list); - tx->tx_queued = 0; - - if (msg->ibm_type == IBLND_MSG_NOOP && - (!kiblnd_need_noop(conn) || /* redundant NOOP */ - (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */ - conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) { - /* - * OK to drop when posted enough NOOPs, since - * kiblnd_check_sends_locked will queue NOOP again when - * posted NOOPs complete - */ - spin_unlock(&conn->ibc_lock); - kiblnd_tx_done(peer->ibp_ni, tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_noops_posted); - return 0; - } - - kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits, - peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_credits -= credit; - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (msg->ibm_type == IBLND_MSG_NOOP) - conn->ibc_noops_posted++; - - /* - * CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() - * from the first send; hence the ++ rather than = below. - */ - tx->tx_sending++; - list_add(&tx->tx_list, &conn->ibc_active_txs); - - /* I'm still holding ibc_lock! */ - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) { - rc = -ECONNABORTED; - } else if (tx->tx_pool->tpo_pool.po_failed || - conn->ibc_hdev != tx->tx_pool->tpo_hdev) { - /* close_conn will launch failover */ - rc = -ENETDOWN; - } else { - struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd; - struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr; - struct ib_send_wr *wrq = &tx->tx_wrq[0].wr; - - if (frd) { - if (!frd->frd_valid) { - wrq = &frd->frd_inv_wr; - wrq->next = &frd->frd_fastreg_wr.wr; - } else { - wrq = &frd->frd_fastreg_wr.wr; - } - frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr; - } - - LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), - "bad wr_id %llx, opc %d, flags %d, peer: %s\n", - bad->wr_id, bad->opcode, bad->send_flags, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - bad = NULL; - rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad); - } - - conn->ibc_last_send = jiffies; - - if (!rc) - return 0; - - /* - * NB credits are transferred in the actual - * message, which can only be the last work item - */ - conn->ibc_credits += credit; - conn->ibc_outstanding_credits += msg->ibm_credits; - conn->ibc_nsends_posted--; - if (msg->ibm_type == IBLND_MSG_NOOP) - conn->ibc_noops_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = !tx->tx_sending; - if (done) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CERROR("Error %d posting transmit to %s\n", - rc, libcfs_nid2str(peer->ibp_nid)); - else - CDEBUG(D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(peer->ibp_nid)); - - kiblnd_close_conn(conn, rc); - - if (done) - kiblnd_tx_done(peer->ibp_ni, tx); - - spin_lock(&conn->ibc_lock); - - return -EIO; -} - -static void -kiblnd_check_sends_locked(struct kib_conn *conn) -{ - int ver = conn->ibc_version; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - struct kib_tx *tx; - - /* Don't send anything until after the connection is established */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CDEBUG(D_NET, "%s too soon\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni)); - LASSERT(!IBLND_OOB_CAPABLE(ver) || - conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver)); - LASSERT(conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - struct kib_tx, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (kiblnd_need_noop(conn)) { - spin_unlock(&conn->ibc_lock); - - tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - if (tx) - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - if (tx) - kiblnd_queue_tx_locked(tx, conn); - } - - for (;;) { - int credit; - - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - credit = 0; - tx = list_entry(conn->ibc_tx_queue_nocred.next, - struct kib_tx, tx_list); - } else if (!list_empty(&conn->ibc_tx_noops)) { - LASSERT(!IBLND_OOB_CAPABLE(ver)); - credit = 1; - tx = list_entry(conn->ibc_tx_noops.next, - struct kib_tx, tx_list); - } else if (!list_empty(&conn->ibc_tx_queue)) { - credit = 1; - tx = list_entry(conn->ibc_tx_queue.next, - struct kib_tx, tx_list); - } else { - break; - } - - if (kiblnd_post_tx_locked(conn, tx, credit)) - break; - } -} - -static void -kiblnd_tx_complete(struct kib_tx *tx, int status) -{ - int failed = (status != IB_WC_SUCCESS); - struct kib_conn *conn = tx->tx_conn; - int idle; - - LASSERT(tx->tx_sending > 0); - - if (failed) { - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CNETERR("Tx -> %s cookie %#llx sending %d waiting %d: failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_cookie, tx->tx_sending, tx->tx_waiting, - status); - - kiblnd_close_conn(conn, -EIO); - } else { - kiblnd_peer_alive(conn->ibc_peer); - } - - spin_lock(&conn->ibc_lock); - - /* - * I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. - */ - tx->tx_sending--; - conn->ibc_nsends_posted--; - if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP) - conn->ibc_noops_posted--; - - if (failed) { - tx->tx_waiting = 0; /* don't wait for peer */ - tx->tx_status = -EIO; - } - - idle = !tx->tx_sending && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx); -} - -static void -kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type, - int body_nob) -{ - struct kib_hca_dev *hdev = tx->tx_pool->tpo_hdev; - struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; - struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof(struct kib_msg, ibm_u) + body_nob; - - LASSERT(tx->tx_nwrq >= 0); - LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); - LASSERT(nob <= IBLND_MSG_SIZE); - - kiblnd_init_msg(tx->tx_msg, type, body_nob); - - sge->lkey = hdev->ibh_pd->local_dma_lkey; - sge->addr = tx->tx_msgaddr; - sge->length = nob; - - memset(wrq, 0, sizeof(*wrq)); - - wrq->wr.next = NULL; - wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); - wrq->wr.sg_list = sge; - wrq->wr.num_sge = 1; - wrq->wr.opcode = IB_WR_SEND; - wrq->wr.send_flags = IB_SEND_SIGNALED; - - tx->tx_nwrq++; -} - -static int -kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, - int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie) -{ - struct kib_msg *ibmsg = tx->tx_msg; - struct kib_rdma_desc *srcrd = tx->tx_rd; - struct ib_sge *sge = &tx->tx_sge[0]; - struct ib_rdma_wr *wrq, *next; - int rc = resid; - int srcidx = 0; - int dstidx = 0; - int wrknob; - - LASSERT(!in_interrupt()); - LASSERT(!tx->tx_nwrq); - LASSERT(type == IBLND_MSG_GET_DONE || - type == IBLND_MSG_PUT_DONE); - - if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) { - CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - conn->ibc_max_frags << PAGE_SHIFT, - kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd)); - rc = -EMSGSIZE; - goto too_big; - } - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrags) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrags) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) { - CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - IBLND_MAX_RDMA_FRAGS, - srcidx, srcrd->rd_nfrags, - dstidx, dstrd->rd_nfrags); - rc = -EMSGSIZE; - break; - } - - wrknob = min3(kiblnd_rd_frag_size(srcrd, srcidx), - kiblnd_rd_frag_size(dstrd, dstidx), - (__u32)resid); - - sge = &tx->tx_sge[tx->tx_nwrq]; - sge->addr = kiblnd_rd_frag_addr(srcrd, srcidx); - sge->lkey = kiblnd_rd_frag_key(srcrd, srcidx); - sge->length = wrknob; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - next = wrq + 1; - - wrq->wr.next = &next->wr; - wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->wr.sg_list = sge; - wrq->wr.num_sge = 1; - wrq->wr.opcode = IB_WR_RDMA_WRITE; - wrq->wr.send_flags = 0; - - wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); - wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx); - - srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob); - dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob); - - resid -= wrknob; - - tx->tx_nwrq++; - wrq++; - sge++; - } -too_big: - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; - - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx, - type, sizeof(struct kib_completion_msg)); - - return rc; -} - -static void -kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn) -{ - struct list_head *q; - - LASSERT(tx->tx_nwrq > 0); /* work items set up */ - LASSERT(!tx->tx_queued); /* not queued for sending already */ - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + - msecs_to_jiffies(*kiblnd_tunables.kib_timeout * - MSEC_PER_SEC); - - if (!tx->tx_conn) { - kiblnd_conn_addref(conn); - tx->tx_conn = conn; - LASSERT(tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE); - } else { - /* PUT_DONE first attached to conn as a PUT_REQ */ - LASSERT(tx->tx_conn == conn); - LASSERT(tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE); - } - - switch (tx->tx_msg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_PUT_REQ: - case IBLND_MSG_GET_REQ: - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_ACK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - q = &conn->ibc_tx_queue_nocred; - break; - - case IBLND_MSG_NOOP: - if (IBLND_OOB_CAPABLE(conn->ibc_version)) - q = &conn->ibc_tx_queue_nocred; - else - q = &conn->ibc_tx_noops; - break; - - case IBLND_MSG_IMMEDIATE: - q = &conn->ibc_tx_queue; - break; - } - - list_add_tail(&tx->tx_list, q); -} - -static void -kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn) -{ - spin_lock(&conn->ibc_lock); - kiblnd_queue_tx_locked(tx, conn); - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); -} - -static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, - struct sockaddr_in *srcaddr, - struct sockaddr_in *dstaddr, - int timeout_ms) -{ - unsigned short port; - int rc; - - /* allow the port to be reused */ - rc = rdma_set_reuseaddr(cmid, 1); - if (rc) { - CERROR("Unable to set reuse on cmid: %d\n", rc); - return rc; - } - - /* look for a free privileged port */ - for (port = PROT_SOCK - 1; port > 0; port--) { - srcaddr->sin_port = htons(port); - rc = rdma_resolve_addr(cmid, - (struct sockaddr *)srcaddr, - (struct sockaddr *)dstaddr, - timeout_ms); - if (!rc) { - CDEBUG(D_NET, "bound to port %hu\n", port); - return 0; - } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) { - CDEBUG(D_NET, "bind to port %hu failed: %d\n", - port, rc); - } else { - return rc; - } - } - - CERROR("Failed to bind to a free privileged port\n"); - return rc; -} - -static void -kiblnd_connect_peer(struct kib_peer *peer) -{ - struct rdma_cm_id *cmid; - struct kib_dev *dev; - struct kib_net *net = peer->ibp_ni->ni_data; - struct sockaddr_in srcaddr; - struct sockaddr_in dstaddr; - int rc; - - LASSERT(net); - LASSERT(peer->ibp_connecting > 0); - LASSERT(!peer->ibp_reconnecting); - - cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP, - IB_QPT_RC); - - if (IS_ERR(cmid)) { - CERROR("Can't create CMID for %s: %ld\n", - libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid)); - rc = PTR_ERR(cmid); - goto failed; - } - - dev = net->ibn_dev; - memset(&srcaddr, 0, sizeof(srcaddr)); - srcaddr.sin_family = AF_INET; - srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip); - - memset(&dstaddr, 0, sizeof(dstaddr)); - dstaddr.sin_family = AF_INET; - dstaddr.sin_port = htons(*kiblnd_tunables.kib_service); - dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid)); - - kiblnd_peer_addref(peer); /* cmid's ref */ - - if (*kiblnd_tunables.kib_use_priv_port) { - rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr, - *kiblnd_tunables.kib_timeout * 1000); - } else { - rc = rdma_resolve_addr(cmid, - (struct sockaddr *)&srcaddr, - (struct sockaddr *)&dstaddr, - *kiblnd_tunables.kib_timeout * 1000); - } - if (rc) { - /* Can't initiate address resolution: */ - CERROR("Can't resolve addr for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - goto failed2; - } - - LASSERT(cmid->device); - CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n", - libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname, - &dev->ibd_ifip, cmid->device->name); - - return; - - failed2: - kiblnd_peer_connect_failed(peer, 1, rc); - kiblnd_peer_decref(peer); /* cmid's ref */ - rdma_destroy_id(cmid); - return; - failed: - kiblnd_peer_connect_failed(peer, 1, rc); -} - -bool -kiblnd_reconnect_peer(struct kib_peer *peer) -{ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - char *reason = NULL; - struct list_head txs; - unsigned long flags; - - INIT_LIST_HEAD(&txs); - - write_lock_irqsave(glock, flags); - if (!peer->ibp_reconnecting) { - if (peer->ibp_accepting) - reason = "accepting"; - else if (peer->ibp_connecting) - reason = "connecting"; - else if (!list_empty(&peer->ibp_conns)) - reason = "connected"; - else /* connected then closed */ - reason = "closed"; - - goto no_reconnect; - } - - LASSERT(!peer->ibp_accepting && !peer->ibp_connecting && - list_empty(&peer->ibp_conns)); - peer->ibp_reconnecting = 0; - - if (!kiblnd_peer_active(peer)) { - list_splice_init(&peer->ibp_tx_queue, &txs); - reason = "unlinked"; - goto no_reconnect; - } - - peer->ibp_connecting++; - peer->ibp_reconnected++; - write_unlock_irqrestore(glock, flags); - - kiblnd_connect_peer(peer); - return true; - -no_reconnect: - write_unlock_irqrestore(glock, flags); - - CWARN("Abort reconnection of %s: %s\n", - libcfs_nid2str(peer->ibp_nid), reason); - kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED); - return false; -} - -void -kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) -{ - struct kib_peer *peer; - struct kib_peer *peer2; - struct kib_conn *conn; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - int rc; - - /* - * If I get here, I've committed to send, so I complete the tx with - * failure on any problems - */ - LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */ - LASSERT(!tx || tx->tx_nwrq > 0); /* work items have been set up */ - - /* - * First time, just use a read lock since I expect to find my peer - * connected - */ - read_lock_irqsave(g_lock, flags); - - peer = kiblnd_find_peer_locked(nid); - if (peer && !list_empty(&peer->ibp_conns)) { - /* Found a peer with an established connection */ - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(g_lock, flags); - - if (tx) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - return; - } - - read_unlock(g_lock); - /* Re-try with a write lock */ - write_lock(g_lock); - - peer = kiblnd_find_peer_locked(nid); - if (peer) { - if (list_empty(&peer->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT(kiblnd_peer_connecting(peer)); - if (tx) - list_add_tail(&tx->tx_list, - &peer->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - if (tx) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - return; - } - - write_unlock_irqrestore(g_lock, flags); - - /* Allocate a peer ready to add to the peer table and retry */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc) { - CERROR("Can't create peer %s\n", libcfs_nid2str(nid)); - if (tx) { - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kiblnd_tx_done(ni, tx); - } - return; - } - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2) { - if (list_empty(&peer2->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT(kiblnd_peer_connecting(peer2)); - if (tx) - list_add_tail(&tx->tx_list, - &peer2->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer2); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - if (tx) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - - kiblnd_peer_decref(peer); - return; - } - - /* Brand new peer */ - LASSERT(!peer->ibp_connecting); - peer->ibp_connecting = 1; - - /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown); - - if (tx) - list_add_tail(&tx->tx_list, &peer->ibp_tx_queue); - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - - kiblnd_connect_peer(peer); - kiblnd_peer_decref(peer); -} - -int -kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) -{ - struct lnet_hdr *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - struct lnet_process_id target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct kvec *payload_iov = lntmsg->msg_iov; - struct bio_vec *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - struct iov_iter from; - struct kib_msg *ibmsg; - struct kib_rdma_desc *rd; - struct kib_tx *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT(!payload_nob || payload_niov > 0); - LASSERT(payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT(!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT(!(payload_kiov && payload_iov)); - - if (payload_kiov) - iov_iter_bvec(&from, ITER_BVEC | WRITE, - payload_kiov, payload_niov, - payload_nob + payload_offset); - else - iov_iter_kvec(&from, ITER_KVEC | WRITE, - payload_iov, payload_niov, - payload_nob + payload_offset); - - iov_iter_advance(&from, payload_offset); - - switch (type) { - default: - LBUG(); - return -EIO; - - case LNET_MSG_ACK: - LASSERT(!payload_nob); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni, target.nid); - if (!tx) { - CERROR("Can't allocate txd for GET to %s\n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - rd = &ibmsg->ibm_u.get.ibgm_rd; - if (!(lntmsg->msg_md->md_options & LNET_MD_KIOV)) - rc = kiblnd_setup_rd_iov(ni, tx, rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kiblnd_setup_rd_kiov(ni, tx, rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - nob = offsetof(struct kib_get_msg, ibgm_rd.rd_frags[rd->rd_nfrags]); - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg); - if (!tx->tx_lntmsg[1]) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni, target.nid); - if (!tx) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (!payload_kiov) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(struct kib_putreq_msg)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBLND_MSG_SIZE); - - tx = kiblnd_get_idle_tx(ni, target.nid); - if (!tx) { - CERROR("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob, - &from); - if (rc != payload_nob) { - kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list); - return -EFAULT; - } - - nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]); - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; -} - -static void -kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg) -{ - struct lnet_process_id target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct kvec *iov = lntmsg->msg_iov; - struct bio_vec *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - struct kib_tx *tx; - int rc; - - tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid); - if (!tx) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (!nob) - rc = 0; - else if (!kiov) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - niov, iov, offset, nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - niov, kiov, offset, nob); - - if (rc) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kiblnd_init_rdma(rx->rx_conn, tx, - IBLND_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (!nob) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kiblnd_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kiblnd_tx_done(ni, tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen) -{ - struct kib_rx *rx = private; - struct kib_msg *rxmsg = rx->rx_msg; - struct kib_conn *conn = rx->rx_conn; - struct kib_tx *tx; - int nob; - int post_credit = IBLND_POSTRX_PEER_CREDIT; - int rc = 0; - - LASSERT(iov_iter_count(to) <= rlen); - LASSERT(!in_interrupt()); - /* Either all pages or all vaddrs */ - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_IMMEDIATE: - nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen, - to); - if (rc != rlen) { - rc = -EFAULT; - break; - } - - rc = 0; - lnet_finalize(ni, lntmsg, 0); - break; - - case IBLND_MSG_PUT_REQ: { - struct kib_msg *txmsg; - struct kib_rdma_desc *rd; - - if (!iov_iter_count(to)) { - lnet_finalize(ni, lntmsg, 0); - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - if (!tx) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - rd = &txmsg->ibm_u.putack.ibpam_rd; - if (!(to->type & ITER_BVEC)) - rc = kiblnd_setup_rd_iov(ni, tx, rd, - to->nr_segs, to->kvec, - to->iov_offset, - iov_iter_count(to)); - else - rc = kiblnd_setup_rd_kiov(ni, tx, rd, - to->nr_segs, to->bvec, - to->iov_offset, - iov_iter_count(to)); - if (rc) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_tx_done(ni, tx); - /* tell peer it's over */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - nob = offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[rd->rd_nfrags]); - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; - - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kiblnd_queue_tx(tx, conn); - - /* reposted buffer reserved for PUT_DONE */ - post_credit = IBLND_POSTRX_NO_CREDIT; - break; - } - - case IBLND_MSG_GET_REQ: - if (lntmsg) { - /* Optimized GET; RDMA lntmsg's payload */ - kiblnd_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kiblnd_post_rx(rx, post_credit); - return rc; -} - -int -kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name) -{ - struct task_struct *task = kthread_run(fn, arg, "%s", name); - - if (IS_ERR(task)) - return PTR_ERR(task); - - atomic_inc(&kiblnd_data.kib_nthreads); - return 0; -} - -static void -kiblnd_thread_fini(void) -{ - atomic_dec(&kiblnd_data.kib_nthreads); -} - -static void -kiblnd_peer_alive(struct kib_peer *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -static void -kiblnd_peer_notify(struct kib_peer *peer) -{ - int error = 0; - unsigned long last_alive = 0; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (kiblnd_peer_idle(peer) && peer->ibp_error) { - error = peer->ibp_error; - peer->ibp_error = 0; - - last_alive = peer->ibp_last_alive; - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (error) - lnet_notify(peer->ibp_ni, - peer->ibp_nid, 0, last_alive); -} - -void -kiblnd_close_conn_locked(struct kib_conn *conn, int error) -{ - /* - * This just does the immediate housekeeping. 'error' is zero for a - * normal shutdown which can happen only after the connection has been - * established. If the connection is established, schedule the - * connection to be finished off by the connd. Otherwise the connd is - * already dealing with it (either to set it up or tear it down). - * Caller holds kib_global_lock exclusively in irq context - */ - struct kib_peer *peer = conn->ibc_peer; - struct kib_dev *dev; - unsigned long flags; - - LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - if (error && !conn->ibc_comms_error) - conn->ibc_comms_error = error; - - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) - return; /* already being handled */ - - if (!error && - list_empty(&conn->ibc_tx_noops) && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s\n", - libcfs_nid2str(peer->ibp_nid)); - } else { - CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); - } - - dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev; - list_del(&conn->ibc_list); - /* connd (see below) takes over ibc_list's ref */ - - if (list_empty(&peer->ibp_conns) && /* no more conns */ - kiblnd_peer_active(peer)) { /* still in peer table */ - kiblnd_unlink_peer_locked(peer); - - /* set/clear error on last conn */ - peer->ibp_error = conn->ibc_comms_error; - } - - kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING); - - if (error && - kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - wake_up(&kiblnd_data.kib_failover_waitq); - } - - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - - list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns); - wake_up(&kiblnd_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); -} - -void -kiblnd_close_conn(struct kib_conn *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - kiblnd_close_conn_locked(conn, error); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -static void -kiblnd_handle_early_rxs(struct kib_conn *conn) -{ - unsigned long flags; - struct kib_rx *rx; - struct kib_rx *tmp; - - LASSERT(!in_interrupt()); - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - list_for_each_entry_safe(rx, tmp, &conn->ibc_early_rxs, rx_list) { - list_del(&rx->rx_list); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_handle_rx(rx); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -static void -kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs) -{ - LIST_HEAD(zombies); - struct list_head *tmp; - struct list_head *nxt; - struct kib_tx *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe(tmp, nxt, txs) { - tx = list_entry(tmp, struct kib_tx, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT(!tx->tx_queued); - LASSERT(tx->tx_waiting || tx->tx_sending); - } else { - LASSERT(tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_waiting = 0; - - if (!tx->tx_sending) { - tx->tx_queued = 0; - list_del(&tx->tx_list); - list_add(&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED); -} - -static void -kiblnd_finalise_conn(struct kib_conn *conn) -{ - LASSERT(!in_interrupt()); - LASSERT(conn->ibc_state > IBLND_CONN_INIT); - - kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED); - - /* - * abort_receives moves QP state to IB_QPS_ERR. This is only required - * for connections that didn't get as far as being connected, because - * rdma_disconnect() does this for free. - */ - kiblnd_abort_receives(conn); - - /* - * Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state - */ - kiblnd_abort_txs(conn, &conn->ibc_tx_noops); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kiblnd_abort_txs(conn, &conn->ibc_active_txs); - - kiblnd_handle_early_rxs(conn); -} - -static void -kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error) -{ - LIST_HEAD(zombies); - unsigned long flags; - - LASSERT(error); - LASSERT(!in_interrupt()); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (active) { - LASSERT(peer->ibp_connecting > 0); - peer->ibp_connecting--; - } else { - LASSERT(peer->ibp_accepting > 0); - peer->ibp_accepting--; - } - - if (kiblnd_peer_connecting(peer)) { - /* another connection attempt under way... */ - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return; - } - - peer->ibp_reconnected = 0; - if (list_empty(&peer->ibp_conns)) { - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kiblnd_peer_active(peer)) - kiblnd_unlink_peer_locked(peer); - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT(list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_peer_notify(peer); - - if (list_empty(&zombies)) - return; - - CNETERR("Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH); -} - -static void -kiblnd_connreq_done(struct kib_conn *conn, int status) -{ - struct kib_peer *peer = conn->ibc_peer; - struct kib_tx *tx; - struct kib_tx *tmp; - struct list_head txs; - unsigned long flags; - int active; - - active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - CDEBUG(D_NET, "%s: active(%d), version(%x), status(%d)\n", - libcfs_nid2str(peer->ibp_nid), active, - conn->ibc_version, status); - - LASSERT(!in_interrupt()); - LASSERT((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT && - peer->ibp_connecting > 0) || - (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT && - peer->ibp_accepting > 0)); - - kfree(conn->ibc_connvars); - conn->ibc_connvars = NULL; - - if (status) { - /* failed to establish connection */ - kiblnd_peer_connect_failed(peer, active, status); - kiblnd_finalise_conn(conn); - return; - } - - /* connection established */ - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - conn->ibc_last_send = jiffies; - kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); - kiblnd_peer_alive(peer); - - /* - * Add conn to peer's list and nuke any dangling conns from a different - * peer instance... - */ - kiblnd_conn_addref(conn); /* +1 ref for ibc_list */ - list_add(&conn->ibc_list, &peer->ibp_conns); - peer->ibp_reconnected = 0; - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - if (!peer->ibp_version) { - peer->ibp_version = conn->ibc_version; - peer->ibp_incarnation = conn->ibc_incarnation; - } - - if (peer->ibp_version != conn->ibc_version || - peer->ibp_incarnation != conn->ibc_incarnation) { - kiblnd_close_stale_conns_locked(peer, conn->ibc_version, - conn->ibc_incarnation); - peer->ibp_version = conn->ibc_version; - peer->ibp_incarnation = conn->ibc_incarnation; - } - - /* grab pending txs while I have the lock */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (!kiblnd_peer_active(peer) || /* peer has been deleted */ - conn->ibc_comms_error) { /* error has happened already */ - struct lnet_ni *ni = peer->ibp_ni; - - /* start to shut down connection */ - kiblnd_close_conn_locked(conn, -ECONNABORTED); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_txlist_done(ni, &txs, -ECONNABORTED); - - return; - } - - /* - * +1 ref for myself, this connection is visible to other threads - * now, refcount of peer:ibp_conns can be released by connection - * close from either a different thread, or the calling of - * kiblnd_check_sends_locked() below. See bz21911 for details. - */ - kiblnd_conn_addref(conn); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* Schedule blocked txs */ - spin_lock(&conn->ibc_lock); - list_for_each_entry_safe(tx, tmp, &txs, tx_list) { - list_del(&tx->tx_list); - - kiblnd_queue_tx_locked(tx, conn); - } - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - - /* schedule blocked rxs */ - kiblnd_handle_early_rxs(conn); - - kiblnd_conn_decref(conn); -} - -static void -kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej) -{ - int rc; - - rc = rdma_reject(cmid, rej, sizeof(*rej)); - - if (rc) - CWARN("Error %d sending reject\n", rc); -} - -static int -kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) -{ - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - struct kib_msg *reqmsg = priv; - struct kib_msg *ackmsg; - struct kib_dev *ibdev; - struct kib_peer *peer; - struct kib_peer *peer2; - struct kib_conn *conn; - struct lnet_ni *ni = NULL; - struct kib_net *net = NULL; - lnet_nid_t nid; - struct rdma_conn_param cp; - struct kib_rej rej; - int version = IBLND_MSG_VERSION; - unsigned long flags; - int max_frags; - int rc; - struct sockaddr_in *peer_addr; - - LASSERT(!in_interrupt()); - - /* cmid inherits 'context' from the corresponding listener id */ - ibdev = (struct kib_dev *)cmid->context; - LASSERT(ibdev); - - memset(&rej, 0, sizeof(rej)); - rej.ibr_magic = IBLND_MSG_MAGIC; - rej.ibr_why = IBLND_REJECT_FATAL; - rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; - - peer_addr = (struct sockaddr_in *)&cmid->route.addr.dst_addr; - if (*kiblnd_tunables.kib_require_priv_port && - ntohs(peer_addr->sin_port) >= PROT_SOCK) { - __u32 ip = ntohl(peer_addr->sin_addr.s_addr); - - CERROR("Peer's port (%pI4h:%hu) is not privileged\n", - &ip, ntohs(peer_addr->sin_port)); - goto failed; - } - - if (priv_nob < offsetof(struct kib_msg, ibm_type)) { - CERROR("Short connection request\n"); - goto failed; - } - - /* - * Future protocol version compatibility support! If the - * o2iblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will - * negotiate a protocol version. I trap this here to avoid - * console errors; the reject tells the peer which protocol I - * speak. - */ - if (reqmsg->ibm_magic == LNET_PROTO_MAGIC || - reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) - goto failed; - if (reqmsg->ibm_magic == IBLND_MSG_MAGIC && - reqmsg->ibm_version != IBLND_MSG_VERSION && - reqmsg->ibm_version != IBLND_MSG_VERSION_1) - goto failed; - if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1)) - goto failed; - - rc = kiblnd_unpack_msg(reqmsg, priv_nob); - if (rc) { - CERROR("Can't parse connection request: %d\n", rc); - goto failed; - } - - nid = reqmsg->ibm_srcnid; - ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid)); - - if (ni) { - net = (struct kib_net *)ni->ni_data; - rej.ibr_incarnation = net->ibn_incarnation; - } - - if (!ni || /* no matching net */ - ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ - net->ibn_dev != ibdev) { /* wrong device */ - CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n", - libcfs_nid2str(nid), - !ni ? "NA" : libcfs_nid2str(ni->ni_nid), - ibdev->ibd_ifname, ibdev->ibd_nnets, - &ibdev->ibd_ifip, - libcfs_nid2str(reqmsg->ibm_dstnid)); - - goto failed; - } - - /* check time stamp as soon as possible */ - if (reqmsg->ibm_dststamp && - reqmsg->ibm_dststamp != net->ibn_incarnation) { - CWARN("Stale connection request\n"); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } - - /* I can accept peer's version */ - version = reqmsg->ibm_version; - - if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - reqmsg->ibm_type, libcfs_nid2str(nid)); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_queue_depth > - kiblnd_msg_queue_size(version, ni)) { - CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_queue_depth, - kiblnd_msg_queue_size(version, ni)); - - if (version == IBLND_MSG_VERSION) - rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE; - - goto failed; - } - - max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT; - if (max_frags > kiblnd_rdma_frags(version, ni)) { - CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n", - libcfs_nid2str(nid), version, max_frags, - kiblnd_rdma_frags(version, ni)); - - if (version >= IBLND_MSG_VERSION) - rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; - - goto failed; - } else if (max_frags < kiblnd_rdma_frags(version, ni) && - !net->ibn_fmr_ps) { - CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n", - libcfs_nid2str(nid), version, max_frags, - kiblnd_rdma_frags(version, ni)); - - if (version == IBLND_MSG_VERSION) - rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; - - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - goto failed; - } - - /* assume 'nid' is a new peer; create */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc) { - CERROR("Can't create peer for %s\n", libcfs_nid2str(nid)); - rej.ibr_why = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - /* We have validated the peer's parameters so use those */ - peer->ibp_max_frags = max_frags; - peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth; - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2) { - if (!peer2->ibp_version) { - peer2->ibp_version = version; - peer2->ibp_incarnation = reqmsg->ibm_srcstamp; - } - - /* not the guy I've talked with */ - if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp || - peer2->ibp_version != version) { - kiblnd_close_peer_conns_locked(peer2, -ESTALE); - - if (kiblnd_peer_active(peer2)) { - peer2->ibp_incarnation = reqmsg->ibm_srcstamp; - peer2->ibp_version = version; - } - write_unlock_irqrestore(g_lock, flags); - - CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n", - libcfs_nid2str(nid), peer2->ibp_version, version, - peer2->ibp_incarnation, reqmsg->ibm_srcstamp); - - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } - - /* - * Tie-break connection race in favour of the higher NID. - * If we keep running into a race condition multiple times, - * we have to assume that the connection attempt with the - * higher NID is stuck in a connecting state and will never - * recover. As such, we pass through this if-block and let - * the lower NID connection win so we can move forward. - */ - if (peer2->ibp_connecting && - nid < ni->ni_nid && peer2->ibp_races < - MAX_CONN_RACES_BEFORE_ABORT) { - peer2->ibp_races++; - write_unlock_irqrestore(g_lock, flags); - - CDEBUG(D_NET, "Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_CONN_RACE; - goto failed; - } - if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT) - CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n", - libcfs_nid2str(peer2->ibp_nid), - MAX_CONN_RACES_BEFORE_ABORT); - /** - * passive connection is allowed even this peer is waiting for - * reconnection. - */ - peer2->ibp_reconnecting = 0; - peer2->ibp_races = 0; - peer2->ibp_accepting++; - kiblnd_peer_addref(peer2); - - /** - * Race with kiblnd_launch_tx (active connect) to create peer - * so copy validated parameters since we now know what the - * peer's limits are - */ - peer2->ibp_max_frags = peer->ibp_max_frags; - peer2->ibp_queue_depth = peer->ibp_queue_depth; - - write_unlock_irqrestore(g_lock, flags); - kiblnd_peer_decref(peer); - peer = peer2; - } else { - /* Brand new peer */ - LASSERT(!peer->ibp_accepting); - LASSERT(!peer->ibp_version && - !peer->ibp_incarnation); - - peer->ibp_accepting = 1; - peer->ibp_version = version; - peer->ibp_incarnation = reqmsg->ibm_srcstamp; - - /* I have a ref on ni that prevents it being shutdown */ - LASSERT(!net->ibn_shutdown); - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - } - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, - version); - if (!conn) { - kiblnd_peer_connect_failed(peer, 0, -ENOMEM); - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - /* - * conn now "owns" cmid, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. - */ - conn->ibc_incarnation = reqmsg->ibm_srcstamp; - conn->ibc_credits = conn->ibc_queue_depth; - conn->ibc_reserved_credits = conn->ibc_queue_depth; - LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + - IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn)); - - ackmsg = &conn->ibc_connvars->cv_msg; - memset(ackmsg, 0, sizeof(*ackmsg)); - - kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, - sizeof(ackmsg->ibm_u.connparams)); - ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; - ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT; - ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - - kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = ackmsg; - cp.private_data_len = ackmsg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); - - rc = rdma_accept(cmid, &cp); - if (rc) { - CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); - rej.ibr_version = version; - rej.ibr_why = IBLND_REJECT_FATAL; - - kiblnd_reject(cmid, &rej); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - lnet_ni_decref(ni); - return 0; - - failed: - if (ni) { - rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni); - rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni); - lnet_ni_decref(ni); - } - - rej.ibr_version = version; - kiblnd_reject(cmid, &rej); - - return -ECONNREFUSED; -} - -static void -kiblnd_check_reconnect(struct kib_conn *conn, int version, - __u64 incarnation, int why, struct kib_connparams *cp) -{ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_peer *peer = conn->ibc_peer; - char *reason; - int msg_size = IBLND_MSG_SIZE; - int frag_num = -1; - int queue_dep = -1; - bool reconnect; - unsigned long flags; - - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */ - LASSERT(!peer->ibp_reconnecting); - - if (cp) { - msg_size = cp->ibcp_max_msg_size; - frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT; - queue_dep = cp->ibcp_queue_depth; - } - - write_lock_irqsave(glock, flags); - /** - * retry connection if it's still needed and no other connection - * attempts (active or passive) are in progress - * NB: reconnect is still needed even when ibp_tx_queue is - * empty if ibp_version != version because reconnect may be - * initiated by kiblnd_query() - */ - reconnect = (!list_empty(&peer->ibp_tx_queue) || - peer->ibp_version != version) && - peer->ibp_connecting == 1 && - !peer->ibp_accepting; - if (!reconnect) { - reason = "no need"; - goto out; - } - - switch (why) { - default: - reason = "Unknown"; - break; - - case IBLND_REJECT_RDMA_FRAGS: { - struct lnet_ioctl_config_lnd_tunables *tunables; - - if (!cp) { - reason = "can't negotiate max frags"; - goto out; - } - tunables = peer->ibp_ni->ni_lnd_tunables; - if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) { - reason = "map_on_demand must be enabled"; - goto out; - } - if (conn->ibc_max_frags <= frag_num) { - reason = "unsupported max frags"; - goto out; - } - - peer->ibp_max_frags = frag_num; - reason = "rdma fragments"; - break; - } - case IBLND_REJECT_MSG_QUEUE_SIZE: - if (!cp) { - reason = "can't negotiate queue depth"; - goto out; - } - if (conn->ibc_queue_depth <= queue_dep) { - reason = "unsupported queue depth"; - goto out; - } - - peer->ibp_queue_depth = queue_dep; - reason = "queue depth"; - break; - - case IBLND_REJECT_CONN_STALE: - reason = "stale"; - break; - - case IBLND_REJECT_CONN_RACE: - reason = "conn race"; - break; - - case IBLND_REJECT_CONN_UNCOMPAT: - reason = "version negotiation"; - break; - } - - conn->ibc_reconnect = 1; - peer->ibp_reconnecting = 1; - peer->ibp_version = version; - if (incarnation) - peer->ibp_incarnation = incarnation; -out: - write_unlock_irqrestore(glock, flags); - - CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n", - libcfs_nid2str(peer->ibp_nid), - reconnect ? "reconnect" : "don't reconnect", - reason, IBLND_MSG_VERSION, version, msg_size, - conn->ibc_queue_depth, queue_dep, - conn->ibc_max_frags, frag_num); - /** - * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer - * while destroying the zombie - */ -} - -static void -kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob) -{ - struct kib_peer *peer = conn->ibc_peer; - - LASSERT(!in_interrupt()); - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - switch (reason) { - case IB_CM_REJ_STALE_CONN: - kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0, - IBLND_REJECT_CONN_STALE, NULL); - break; - - case IB_CM_REJ_INVALID_SERVICE_ID: - CNETERR("%s rejected: no listener at %d\n", - libcfs_nid2str(peer->ibp_nid), - *kiblnd_tunables.kib_service); - break; - - case IB_CM_REJ_CONSUMER_DEFINED: - if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) { - struct kib_rej *rej = priv; - struct kib_connparams *cp = NULL; - int flip = 0; - __u64 incarnation = -1; - - /* NB. default incarnation is -1 because: - * a) V1 will ignore dst incarnation in connreq. - * b) V2 will provide incarnation while rejecting me, - * -1 will be overwrote. - * - * if I try to connect to a V1 peer with V2 protocol, - * it rejected me then upgrade to V2, I have no idea - * about the upgrading and try to reconnect with V1, - * in this case upgraded V2 can find out I'm trying to - * talk to the old guy and reject me(incarnation is -1). - */ - - if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) || - rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) { - __swab32s(&rej->ibr_magic); - __swab16s(&rej->ibr_version); - flip = 1; - } - - if (priv_nob >= sizeof(struct kib_rej) && - rej->ibr_version > IBLND_MSG_VERSION_1) { - /* - * priv_nob is always 148 in current version - * of OFED, so we still need to check version. - * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) - */ - cp = &rej->ibr_cp; - - if (flip) { - __swab64s(&rej->ibr_incarnation); - __swab16s(&cp->ibcp_queue_depth); - __swab16s(&cp->ibcp_max_frags); - __swab32s(&cp->ibcp_max_msg_size); - } - - incarnation = rej->ibr_incarnation; - } - - if (rej->ibr_magic != IBLND_MSG_MAGIC && - rej->ibr_magic != LNET_PROTO_MAGIC) { - CERROR("%s rejected: consumer defined fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - } - - if (rej->ibr_version != IBLND_MSG_VERSION && - rej->ibr_version != IBLND_MSG_VERSION_1) { - CERROR("%s rejected: o2iblnd version %x error\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_version); - break; - } - - if (rej->ibr_why == IBLND_REJECT_FATAL && - rej->ibr_version == IBLND_MSG_VERSION_1) { - CDEBUG(D_NET, "rejected by old version peer %s: %x\n", - libcfs_nid2str(peer->ibp_nid), rej->ibr_version); - - if (conn->ibc_version != IBLND_MSG_VERSION_1) - rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT; - } - - switch (rej->ibr_why) { - case IBLND_REJECT_CONN_RACE: - case IBLND_REJECT_CONN_STALE: - case IBLND_REJECT_CONN_UNCOMPAT: - case IBLND_REJECT_MSG_QUEUE_SIZE: - case IBLND_REJECT_RDMA_FRAGS: - kiblnd_check_reconnect(conn, rej->ibr_version, - incarnation, - rej->ibr_why, cp); - break; - - case IBLND_REJECT_NO_RESOURCES: - CERROR("%s rejected: o2iblnd no resources\n", - libcfs_nid2str(peer->ibp_nid)); - break; - - case IBLND_REJECT_FATAL: - CERROR("%s rejected: o2iblnd fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - - default: - CERROR("%s rejected: o2iblnd reason %d\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_why); - break; - } - break; - } - /* fall through */ - default: - CNETERR("%s rejected: reason %d, size %d\n", - libcfs_nid2str(peer->ibp_nid), reason, priv_nob); - break; - } - - kiblnd_connreq_done(conn, -ECONNREFUSED); -} - -static void -kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob) -{ - struct kib_peer *peer = conn->ibc_peer; - struct lnet_ni *ni = peer->ibp_ni; - struct kib_net *net = ni->ni_data; - struct kib_msg *msg = priv; - int ver = conn->ibc_version; - int rc = kiblnd_unpack_msg(msg, priv_nob); - unsigned long flags; - - LASSERT(net); - - if (rc) { - CERROR("Can't unpack connack from %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - goto failed; - } - - if (msg->ibm_type != IBLND_MSG_CONNACK) { - CERROR("Unexpected message %d from %s\n", - msg->ibm_type, libcfs_nid2str(peer->ibp_nid)); - rc = -EPROTO; - goto failed; - } - - if (ver != msg->ibm_version) { - CERROR("%s replied version %x is different with requested version %x\n", - libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth > - conn->ibc_queue_depth) { - CERROR("%s has incompatible queue depth %d (<=%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_queue_depth, - conn->ibc_queue_depth); - rc = -EPROTO; - goto failed; - } - - if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) > - conn->ibc_max_frags) { - CERROR("%s has incompatible max_frags %d (<=%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT, - conn->ibc_max_frags); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("%s max message size %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - rc = -EPROTO; - goto failed; - } - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (msg->ibm_dstnid == ni->ni_nid && - msg->ibm_dststamp == net->ibn_incarnation) - rc = 0; - else - rc = -ESTALE; - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (rc) { - CERROR("Bad connection reply from %s, rc = %d, version: %x max_frags: %d\n", - libcfs_nid2str(peer->ibp_nid), rc, - msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags); - goto failed; - } - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth; - conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth; - conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth; - conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT; - LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + - IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn)); - - kiblnd_connreq_done(conn, 0); - return; - - failed: - /* - * NB My QP has already established itself, so I handle anything going - * wrong here by setting ibc_comms_error. - * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then - * immediately tears it down. - */ - LASSERT(rc); - conn->ibc_comms_error = rc; - kiblnd_connreq_done(conn, 0); -} - -static int -kiblnd_active_connect(struct rdma_cm_id *cmid) -{ - struct kib_peer *peer = (struct kib_peer *)cmid->context; - struct kib_conn *conn; - struct kib_msg *msg; - struct rdma_conn_param cp; - int version; - __u64 incarnation; - unsigned long flags; - int rc; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - incarnation = peer->ibp_incarnation; - version = !peer->ibp_version ? IBLND_MSG_VERSION : - peer->ibp_version; - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, - version); - if (!conn) { - kiblnd_peer_connect_failed(peer, 1, -ENOMEM); - kiblnd_peer_decref(peer); /* lose cmid's ref */ - return -ENOMEM; - } - - /* - * conn "owns" cmid now, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. conn also takes over cmid's ref - * on peer - */ - msg = &conn->ibc_connvars->cv_msg; - - memset(msg, 0, sizeof(*msg)); - kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); - msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; - msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT; - msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - - kiblnd_pack_msg(peer->ibp_ni, msg, version, - 0, peer->ibp_nid, incarnation); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = msg; - cp.private_data_len = msg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - LASSERT(cmid->context == (void *)conn); - LASSERT(conn->ibc_cmid == cmid); - - rc = rdma_connect(cmid, &cp); - if (rc) { - CERROR("Can't connect to %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - return 0; -} - -int -kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) -{ - struct kib_peer *peer; - struct kib_conn *conn; - int rc; - - switch (event->event) { - default: - CERROR("Unexpected event: %d, status: %d\n", - event->event, event->status); - LBUG(); - - case RDMA_CM_EVENT_CONNECT_REQUEST: - /* destroy cmid on failure */ - rc = kiblnd_passive_connect(cmid, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - CDEBUG(D_NET, "connreq: %d\n", rc); - return rc; - - case RDMA_CM_EVENT_ADDR_ERROR: - peer = (struct kib_peer *)cmid->context; - CNETERR("%s: ADDR ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc destroys cmid */ - - case RDMA_CM_EVENT_ADDR_RESOLVED: - peer = (struct kib_peer *)cmid->context; - - CDEBUG(D_NET, "%s Addr resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (event->status) { - CNETERR("Can't resolve address for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - rc = event->status; - } else { - rc = rdma_resolve_route( - cmid, *kiblnd_tunables.kib_timeout * 1000); - if (!rc) - return 0; - /* Can't initiate route resolution */ - CERROR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - } - kiblnd_peer_connect_failed(peer, 1, rc); - kiblnd_peer_decref(peer); - return rc; /* rc destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_ERROR: - peer = (struct kib_peer *)cmid->context; - CNETERR("%s: ROUTE ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_RESOLVED: - peer = (struct kib_peer *)cmid->context; - CDEBUG(D_NET, "%s Route resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (!event->status) - return kiblnd_active_connect(cmid); - - CNETERR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, event->status); - kiblnd_peer_decref(peer); - return event->status; /* rc destroys cmid */ - - case RDMA_CM_EVENT_UNREACHABLE: - conn = (struct kib_conn *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CNETERR("%s: UNREACHABLE %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENETDOWN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_CONNECT_ERROR: - conn = (struct kib_conn *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CNETERR("%s: CONNECT ERROR %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENOTCONN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_REJECTED: - conn = (struct kib_conn *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CERROR("%s: REJECTED %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - event->status); - kiblnd_connreq_done(conn, -ECONNRESET); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - kiblnd_rejected(conn, event->status, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - break; - } - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_ESTABLISHED: - conn = (struct kib_conn *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CDEBUG(D_NET, "ESTABLISHED (passive): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, 0); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - CDEBUG(D_NET, "ESTABLISHED(active): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_check_connreply(conn, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - break; - } - /* net keeps its ref on conn! */ - return 0; - - case RDMA_CM_EVENT_TIMEWAIT_EXIT: - CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n"); - return 0; - case RDMA_CM_EVENT_DISCONNECTED: - conn = (struct kib_conn *)cmid->context; - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CERROR("%s DISCONNECTED\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, -ECONNRESET); - } else { - kiblnd_close_conn(conn, 0); - } - kiblnd_conn_decref(conn); - cmid->context = NULL; - return 0; - - case RDMA_CM_EVENT_DEVICE_REMOVAL: - LCONSOLE_ERROR_MSG(0x131, - "Received notification of device removal\n" - "Please shutdown LNET to allow this to proceed\n"); - /* - * Can't remove network from underneath LNET for now, so I have - * to ignore this - */ - return 0; - - case RDMA_CM_EVENT_ADDR_CHANGE: - LCONSOLE_INFO("Physical link changed (eg hca/port)\n"); - return 0; - } -} - -static int -kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs) -{ - struct kib_tx *tx; - struct list_head *ttmp; - - list_for_each(ttmp, txs) { - tx = list_entry(ttmp, struct kib_tx, tx_list); - - if (txs != &conn->ibc_active_txs) { - LASSERT(tx->tx_queued); - } else { - LASSERT(!tx->tx_queued); - LASSERT(tx->tx_waiting || tx->tx_sending); - } - - if (cfs_time_aftereq(jiffies, tx->tx_deadline)) { - CERROR("Timed out tx: %s, %lu seconds\n", - kiblnd_queue2str(conn, txs), - cfs_duration_sec(jiffies - tx->tx_deadline)); - return 1; - } - } - - return 0; -} - -static int -kiblnd_conn_timed_out_locked(struct kib_conn *conn) -{ - return kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) || - kiblnd_check_txs_locked(conn, &conn->ibc_active_txs); -} - -static void -kiblnd_check_conns(int idx) -{ - LIST_HEAD(closes); - LIST_HEAD(checksends); - struct list_head *peers = &kiblnd_data.kib_peers[idx]; - struct list_head *ptmp; - struct kib_peer *peer; - struct kib_conn *conn; - struct kib_conn *temp; - struct kib_conn *tmp; - struct list_head *ctmp; - unsigned long flags; - - /* - * NB. We expect to have a look at all the peers and not find any - * RDMAs to time out, so we just use a shared lock while we - * take a look... - */ - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - list_for_each(ptmp, peers) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - - list_for_each(ctmp, &peer->ibp_conns) { - int timedout; - int sendnoop; - - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED); - - spin_lock(&conn->ibc_lock); - - sendnoop = kiblnd_need_noop(conn); - timedout = kiblnd_conn_timed_out_locked(conn); - if (!sendnoop && !timedout) { - spin_unlock(&conn->ibc_lock); - continue; - } - - if (timedout) { - CERROR("Timed out RDMA with %s (%lu): c: %u, oc: %u, rc: %u\n", - libcfs_nid2str(peer->ibp_nid), - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive), - conn->ibc_credits, - conn->ibc_outstanding_credits, - conn->ibc_reserved_credits); - list_add(&conn->ibc_connd_list, &closes); - } else { - list_add(&conn->ibc_connd_list, &checksends); - } - /* +ref for 'closes' or 'checksends' */ - kiblnd_conn_addref(conn); - - spin_unlock(&conn->ibc_lock); - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* - * Handle timeout by closing the whole - * connection. We can only be sure RDMA activity - * has ceased once the QP has been modified. - */ - list_for_each_entry_safe(conn, tmp, &closes, ibc_connd_list) { - list_del(&conn->ibc_connd_list); - kiblnd_close_conn(conn, -ETIMEDOUT); - kiblnd_conn_decref(conn); - } - - /* - * In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... - */ - list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) { - list_del(&conn->ibc_connd_list); - - spin_lock(&conn->ibc_lock); - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - - kiblnd_conn_decref(conn); - } -} - -static void -kiblnd_disconnect_conn(struct kib_conn *conn) -{ - LASSERT(!in_interrupt()); - LASSERT(current == kiblnd_data.kib_connd); - LASSERT(conn->ibc_state == IBLND_CONN_CLOSING); - - rdma_disconnect(conn->ibc_cmid); - kiblnd_finalise_conn(conn); - - kiblnd_peer_notify(conn->ibc_peer); -} - -/** - * High-water for reconnection to the same peer, reconnection attempt should - * be delayed after trying more than KIB_RECONN_HIGH_RACE. - */ -#define KIB_RECONN_HIGH_RACE 10 -/** - * Allow connd to take a break and handle other things after consecutive - * reconnection attempts. - */ -#define KIB_RECONN_BREAK 100 - -int -kiblnd_connd(void *arg) -{ - spinlock_t *lock = &kiblnd_data.kib_connd_lock; - wait_queue_entry_t wait; - unsigned long flags; - struct kib_conn *conn; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; - - init_waitqueue_entry(&wait, current); - kiblnd_data.kib_connd = current; - - spin_lock_irqsave(lock, flags); - - while (!kiblnd_data.kib_shutdown) { - int reconn = 0; - - dropped_lock = 0; - - if (!list_empty(&kiblnd_data.kib_connd_zombies)) { - struct kib_peer *peer = NULL; - - conn = list_entry(kiblnd_data.kib_connd_zombies.next, - struct kib_conn, ibc_list); - list_del(&conn->ibc_list); - if (conn->ibc_reconnect) { - peer = conn->ibc_peer; - kiblnd_peer_addref(peer); - } - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - kiblnd_destroy_conn(conn); - - spin_lock_irqsave(lock, flags); - if (!peer) { - kfree(conn); - continue; - } - - conn->ibc_peer = peer; - if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) - list_add_tail(&conn->ibc_list, - &kiblnd_data.kib_reconn_list); - else - list_add_tail(&conn->ibc_list, - &kiblnd_data.kib_reconn_wait); - } - - if (!list_empty(&kiblnd_data.kib_connd_conns)) { - conn = list_entry(kiblnd_data.kib_connd_conns.next, - struct kib_conn, ibc_list); - list_del(&conn->ibc_list); - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - kiblnd_disconnect_conn(conn); - kiblnd_conn_decref(conn); - - spin_lock_irqsave(lock, flags); - } - - while (reconn < KIB_RECONN_BREAK) { - if (kiblnd_data.kib_reconn_sec != - ktime_get_real_seconds()) { - kiblnd_data.kib_reconn_sec = ktime_get_real_seconds(); - list_splice_init(&kiblnd_data.kib_reconn_wait, - &kiblnd_data.kib_reconn_list); - } - - if (list_empty(&kiblnd_data.kib_reconn_list)) - break; - - conn = list_entry(kiblnd_data.kib_reconn_list.next, - struct kib_conn, ibc_list); - list_del(&conn->ibc_list); - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - reconn += kiblnd_reconnect_peer(conn->ibc_peer); - kiblnd_peer_decref(conn->ibc_peer); - kfree(conn); - - spin_lock_irqsave(lock, flags); - } - - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kiblnd_data.kib_peer_hash_size; - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - /* - * Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. - */ - if (*kiblnd_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kiblnd_tunables.kib_timeout; - if (!chunk) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kiblnd_check_conns(peer_index); - peer_index = (peer_index + 1) % - kiblnd_data.kib_peer_hash_size; - } - - deadline += msecs_to_jiffies(p * MSEC_PER_SEC); - spin_lock_irqsave(lock, flags); - } - - if (dropped_lock) - continue; - - /* Nothing to do for 'timeout' */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait); - spin_unlock_irqrestore(lock, flags); - - schedule_timeout(timeout); - - remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait); - spin_lock_irqsave(lock, flags); - } - - spin_unlock_irqrestore(lock, flags); - - kiblnd_thread_fini(); - return 0; -} - -void -kiblnd_qp_event(struct ib_event *event, void *arg) -{ - struct kib_conn *conn = arg; - - switch (event->event) { - case IB_EVENT_COMM_EST: - CDEBUG(D_NET, "%s established\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* - * We received a packet but connection isn't established - * probably handshake packet was lost, so free to - * force make connection established - */ - rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST); - return; - - default: - CERROR("%s: Async QP event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); - return; - } -} - -static void -kiblnd_complete(struct ib_wc *wc) -{ - switch (kiblnd_wreqid2type(wc->wr_id)) { - default: - LBUG(); - - case IBLND_WID_MR: - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) - CNETERR("FastReg failed: %d\n", wc->status); - break; - - case IBLND_WID_RDMA: - /* - * We only get RDMA completion notification if it fails. All - * subsequent work items, including the final SEND will fail - * too. However we can't print out any more info about the - * failing RDMA because 'tx' might be back on the idle list or - * even reused already if we didn't manage to post all our work - * items - */ - CNETERR("RDMA (tx: %p) failed: %d\n", - kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_TX: - kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_RX: - kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status, - wc->byte_len); - return; - } -} - -void -kiblnd_cq_completion(struct ib_cq *cq, void *arg) -{ - /* - * NB I'm not allowed to schedule this conn once its refcount has - * reached 0. Since fundamentally I'm racing with scheduler threads - * consuming my CQ I could be called after all completions have - * occurred. But in this case, !ibc_nrx && !ibc_nsends_posted - * and this CQ is about to be destroyed so I NOOP. - */ - struct kib_conn *conn = arg; - struct kib_sched_info *sched = conn->ibc_sched; - unsigned long flags; - - LASSERT(cq == conn->ibc_cq); - - spin_lock_irqsave(&sched->ibs_lock, flags); - - conn->ibc_ready = 1; - - if (!conn->ibc_scheduled && - (conn->ibc_nrx > 0 || - conn->ibc_nsends_posted > 0)) { - kiblnd_conn_addref(conn); /* +1 ref for sched_conns */ - conn->ibc_scheduled = 1; - list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns); - - if (waitqueue_active(&sched->ibs_waitq)) - wake_up(&sched->ibs_waitq); - } - - spin_unlock_irqrestore(&sched->ibs_lock, flags); -} - -void -kiblnd_cq_event(struct ib_event *event, void *arg) -{ - struct kib_conn *conn = arg; - - CERROR("%s: async CQ event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); -} - -int -kiblnd_scheduler(void *arg) -{ - long id = (long)arg; - struct kib_sched_info *sched; - struct kib_conn *conn; - wait_queue_entry_t wait; - unsigned long flags; - struct ib_wc wc; - int did_something; - int busy_loops = 0; - int rc; - - init_waitqueue_entry(&wait, current); - - sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)]; - - rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt); - if (rc) { - CWARN("Unable to bind on CPU partition %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n", - sched->ibs_cpt); - } - - spin_lock_irqsave(&sched->ibs_lock, flags); - - while (!kiblnd_data.kib_shutdown) { - if (busy_loops++ >= IBLND_RESCHED) { - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&sched->ibs_lock, flags); - } - - did_something = 0; - - if (!list_empty(&sched->ibs_conns)) { - conn = list_entry(sched->ibs_conns.next, struct kib_conn, - ibc_sched_list); - /* take over kib_sched_conns' ref on conn... */ - LASSERT(conn->ibc_scheduled); - list_del(&conn->ibc_sched_list); - conn->ibc_ready = 0; - - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - wc.wr_id = IBLND_WID_INVAL; - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - if (!rc) { - rc = ib_req_notify_cq(conn->ibc_cq, - IB_CQ_NEXT_COMP); - if (rc < 0) { - CWARN("%s: ib_req_notify_cq failed: %d, closing connection\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_close_conn(conn, -EIO); - kiblnd_conn_decref(conn); - spin_lock_irqsave(&sched->ibs_lock, - flags); - continue; - } - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - } - - if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) { - LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n", - rc, wc.opcode, wc.status, - wc.vendor_err, - libcfs_nid2str(conn->ibc_peer->ibp_nid), - conn->ibc_state); - rc = -EINVAL; - } - - if (rc < 0) { - CWARN("%s: ib_poll_cq failed: %d, closing connection\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - rc); - kiblnd_close_conn(conn, -EIO); - kiblnd_conn_decref(conn); - spin_lock_irqsave(&sched->ibs_lock, flags); - continue; - } - - spin_lock_irqsave(&sched->ibs_lock, flags); - - if (rc || conn->ibc_ready) { - /* - * There may be another completion waiting; get - * another scheduler to check while I handle - * this one... - */ - /* +1 ref for sched_conns */ - kiblnd_conn_addref(conn); - list_add_tail(&conn->ibc_sched_list, - &sched->ibs_conns); - if (waitqueue_active(&sched->ibs_waitq)) - wake_up(&sched->ibs_waitq); - } else { - conn->ibc_scheduled = 0; - } - - if (rc) { - spin_unlock_irqrestore(&sched->ibs_lock, flags); - kiblnd_complete(&wc); - - spin_lock_irqsave(&sched->ibs_lock, flags); - } - - kiblnd_conn_decref(conn); /* ...drop my ref from above */ - did_something = 1; - } - - if (did_something) - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&sched->ibs_waitq, &wait); - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - schedule(); - busy_loops = 0; - - remove_wait_queue(&sched->ibs_waitq, &wait); - spin_lock_irqsave(&sched->ibs_lock, flags); - } - - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - kiblnd_thread_fini(); - return 0; -} - -int -kiblnd_failover_thread(void *arg) -{ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_dev *dev; - wait_queue_entry_t wait; - unsigned long flags; - int rc; - - LASSERT(*kiblnd_tunables.kib_dev_failover); - - init_waitqueue_entry(&wait, current); - write_lock_irqsave(glock, flags); - - while (!kiblnd_data.kib_shutdown) { - int do_failover = 0; - int long_sleep; - - list_for_each_entry(dev, &kiblnd_data.kib_failed_devs, - ibd_fail_list) { - if (time_before(cfs_time_current(), - dev->ibd_next_failover)) - continue; - do_failover = 1; - break; - } - - if (do_failover) { - list_del_init(&dev->ibd_fail_list); - dev->ibd_failover = 1; - write_unlock_irqrestore(glock, flags); - - rc = kiblnd_dev_failover(dev); - - write_lock_irqsave(glock, flags); - - LASSERT(dev->ibd_failover); - dev->ibd_failover = 0; - if (rc >= 0) { /* Device is OK or failover succeed */ - dev->ibd_next_failover = cfs_time_shift(3); - continue; - } - - /* failed to failover, retry later */ - dev->ibd_next_failover = - cfs_time_shift(min(dev->ibd_failed_failover, 10)); - if (kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - } - - continue; - } - - /* long sleep if no more pending failover */ - long_sleep = list_empty(&kiblnd_data.kib_failed_devs); - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); - write_unlock_irqrestore(glock, flags); - - rc = schedule_timeout(long_sleep ? 10 * HZ : - HZ); - remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); - write_lock_irqsave(glock, flags); - - if (!long_sleep || rc) - continue; - - /* - * have a long sleep, routine check all active devices, - * we need checking like this because if there is not active - * connection on the dev and no SEND from local, we may listen - * on wrong HCA for ever while there is a bonding failover - */ - list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { - if (kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - } - } - } - - write_unlock_irqrestore(glock, flags); - - kiblnd_thread_fini(); - return 0; -} diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c deleted file mode 100644 index b9235400bf1d..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ /dev/null @@ -1,287 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd_modparams.c - * - * Author: Eric Barton <eric@bartonsoftware.com> - */ - -#include "o2iblnd.h" - -static int service = 987; -module_param(service, int, 0444); -MODULE_PARM_DESC(service, "service number (within RDMA_PS_TCP)"); - -static int cksum; -module_param(cksum, int, 0644); -MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -module_param(timeout, int, 0644); -MODULE_PARM_DESC(timeout, "timeout (seconds)"); - -/* - * Number of threads in each scheduler pool which is percpt, - * we will estimate reasonable value based on CPUs if it's set to zero. - */ -static int nscheds; -module_param(nscheds, int, 0444); -MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool"); - -/* NB: this value is shared by all CPTs, it can grow at runtime */ -static int ntx = 512; -module_param(ntx, int, 0444); -MODULE_PARM_DESC(ntx, "# of message descriptors allocated for each pool"); - -/* NB: this value is shared by all CPTs */ -static int credits = 256; -module_param(credits, int, 0444); -MODULE_PARM_DESC(credits, "# concurrent sends"); - -static int peer_credits = 8; -module_param(peer_credits, int, 0444); -MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer"); - -static int peer_credits_hiw; -module_param(peer_credits_hiw, int, 0444); -MODULE_PARM_DESC(peer_credits_hiw, "when eagerly to return credits"); - -static int peer_buffer_credits; -module_param(peer_buffer_credits, int, 0444); -MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits"); - -static int peer_timeout = 180; -module_param(peer_timeout, int, 0444); -MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); - -static char *ipif_name = "ib0"; -module_param(ipif_name, charp, 0444); -MODULE_PARM_DESC(ipif_name, "IPoIB interface name"); - -static int retry_count = 5; -module_param(retry_count, int, 0644); -MODULE_PARM_DESC(retry_count, "Retransmissions when no ACK received"); - -static int rnr_retry_count = 6; -module_param(rnr_retry_count, int, 0644); -MODULE_PARM_DESC(rnr_retry_count, "RNR retransmissions"); - -static int keepalive = 100; -module_param(keepalive, int, 0644); -MODULE_PARM_DESC(keepalive, "Idle time in seconds before sending a keepalive"); - -static int ib_mtu; -module_param(ib_mtu, int, 0444); -MODULE_PARM_DESC(ib_mtu, "IB MTU 256/512/1024/2048/4096"); - -static int concurrent_sends; -module_param(concurrent_sends, int, 0444); -MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing"); - -#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS -static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND; -module_param(map_on_demand, int, 0444); -MODULE_PARM_DESC(map_on_demand, "map on demand"); - -/* NB: this value is shared by all CPTs, it can grow at runtime */ -static int fmr_pool_size = 512; -module_param(fmr_pool_size, int, 0444); -MODULE_PARM_DESC(fmr_pool_size, "size of fmr pool on each CPT (>= ntx / 4)"); - -/* NB: this value is shared by all CPTs, it can grow at runtime */ -static int fmr_flush_trigger = 384; -module_param(fmr_flush_trigger, int, 0444); -MODULE_PARM_DESC(fmr_flush_trigger, "# dirty FMRs that triggers pool flush"); - -static int fmr_cache = 1; -module_param(fmr_cache, int, 0444); -MODULE_PARM_DESC(fmr_cache, "non-zero to enable FMR caching"); - -/* - * 0: disable failover - * 1: enable failover if necessary - * 2: force to failover (for debug) - */ -static int dev_failover; -module_param(dev_failover, int, 0444); -MODULE_PARM_DESC(dev_failover, "HCA failover for bonding (0 off, 1 on, other values reserved)"); - -static int require_privileged_port; -module_param(require_privileged_port, int, 0644); -MODULE_PARM_DESC(require_privileged_port, "require privileged port when accepting connection"); - -static int use_privileged_port = 1; -module_param(use_privileged_port, int, 0644); -MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection"); - -struct kib_tunables kiblnd_tunables = { - .kib_dev_failover = &dev_failover, - .kib_service = &service, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_keepalive = &keepalive, - .kib_ntx = &ntx, - .kib_default_ipif = &ipif_name, - .kib_retry_count = &retry_count, - .kib_rnr_retry_count = &rnr_retry_count, - .kib_ib_mtu = &ib_mtu, - .kib_require_priv_port = &require_privileged_port, - .kib_use_priv_port = &use_privileged_port, - .kib_nscheds = &nscheds -}; - -static struct lnet_ioctl_config_o2iblnd_tunables default_tunables; - -/* # messages/RDMAs in-flight */ -int kiblnd_msg_queue_size(int version, struct lnet_ni *ni) -{ - if (version == IBLND_MSG_VERSION_1) - return IBLND_MSG_QUEUE_SIZE_V1; - else if (ni) - return ni->ni_peertxcredits; - else - return peer_credits; -} - -int kiblnd_tunables_setup(struct lnet_ni *ni) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - - /* - * if there was no tunables specified, setup the tunables to be - * defaulted - */ - if (!ni->ni_lnd_tunables) { - ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables), - GFP_NOFS); - if (!ni->ni_lnd_tunables) - return -ENOMEM; - - memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib, - &default_tunables, sizeof(*tunables)); - } - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - - /* Current API version */ - tunables->lnd_version = 0; - - if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) { - CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n", - *kiblnd_tunables.kib_ib_mtu); - return -EINVAL; - } - - if (!ni->ni_peertimeout) - ni->ni_peertimeout = peer_timeout; - - if (!ni->ni_maxtxcredits) - ni->ni_maxtxcredits = credits; - - if (!ni->ni_peertxcredits) - ni->ni_peertxcredits = peer_credits; - - if (!ni->ni_peerrtrcredits) - ni->ni_peerrtrcredits = peer_buffer_credits; - - if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT) - ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT; - - if (ni->ni_peertxcredits > IBLND_CREDITS_MAX) - ni->ni_peertxcredits = IBLND_CREDITS_MAX; - - if (ni->ni_peertxcredits > credits) - ni->ni_peertxcredits = credits; - - if (!tunables->lnd_peercredits_hiw) - tunables->lnd_peercredits_hiw = peer_credits_hiw; - - if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2) - tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2; - - if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits) - tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1; - - if (tunables->lnd_map_on_demand <= 0 || - tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) { - /* Use the default */ - CWARN("Invalid map_on_demand (%d), expects 1 - %d. Using default of %d\n", - tunables->lnd_map_on_demand, - IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND); - tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND; - } - - if (tunables->lnd_map_on_demand == 1) { - /* don't make sense to create map if only one fragment */ - tunables->lnd_map_on_demand = 2; - } - - if (!tunables->lnd_concurrent_sends) { - if (tunables->lnd_map_on_demand > 0 && - tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) { - tunables->lnd_concurrent_sends = - ni->ni_peertxcredits * 2; - } else { - tunables->lnd_concurrent_sends = ni->ni_peertxcredits; - } - } - - if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2) - tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2; - - if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2) - tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2; - - if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) { - CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n", - tunables->lnd_concurrent_sends, ni->ni_peertxcredits); - } - - if (!tunables->lnd_fmr_pool_size) - tunables->lnd_fmr_pool_size = fmr_pool_size; - if (!tunables->lnd_fmr_flush_trigger) - tunables->lnd_fmr_flush_trigger = fmr_flush_trigger; - if (!tunables->lnd_fmr_cache) - tunables->lnd_fmr_cache = fmr_cache; - - return 0; -} - -void kiblnd_tunables_init(void) -{ - default_tunables.lnd_version = 0; - default_tunables.lnd_peercredits_hiw = peer_credits_hiw, - default_tunables.lnd_map_on_demand = map_on_demand; - default_tunables.lnd_concurrent_sends = concurrent_sends; - default_tunables.lnd_fmr_pool_size = fmr_pool_size; - default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger; - default_tunables.lnd_fmr_cache = fmr_cache; -} diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile deleted file mode 100644 index a7da1abfc804..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET) += ksocklnd.o - -ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib.o diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c deleted file mode 100644 index 7086678e1c3e..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ /dev/null @@ -1,2918 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/socklnd/socklnd.c - * - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - */ - -#include "socklnd.h" - -static struct lnet_lnd the_ksocklnd; -struct ksock_nal_data ksocknal_data; - -static struct ksock_interface * -ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip) -{ - struct ksock_net *net = ni->ni_data; - int i; - struct ksock_interface *iface; - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(i < LNET_MAX_INTERFACES); - iface = &net->ksnn_interfaces[i]; - - if (iface->ksni_ipaddr == ip) - return iface; - } - - return NULL; -} - -static struct ksock_route * -ksocknal_create_route(__u32 ipaddr, int port) -{ - struct ksock_route *route; - - route = kzalloc(sizeof(*route), GFP_NOFS); - if (!route) - return NULL; - - atomic_set(&route->ksnr_refcount, 1); - route->ksnr_peer = NULL; - route->ksnr_retry_interval = 0; /* OK to connect at any time */ - route->ksnr_ipaddr = ipaddr; - route->ksnr_port = port; - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - route->ksnr_connected = 0; - route->ksnr_deleted = 0; - route->ksnr_conn_count = 0; - route->ksnr_share_count = 0; - - return route; -} - -void -ksocknal_destroy_route(struct ksock_route *route) -{ - LASSERT(!atomic_read(&route->ksnr_refcount)); - - if (route->ksnr_peer) - ksocknal_peer_decref(route->ksnr_peer); - - kfree(route); -} - -static int -ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni, - struct lnet_process_id id) -{ - int cpt = lnet_cpt_of_nid(id.nid); - struct ksock_net *net = ni->ni_data; - struct ksock_peer *peer; - - LASSERT(id.nid != LNET_NID_ANY); - LASSERT(id.pid != LNET_PID_ANY); - LASSERT(!in_interrupt()); - - peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt); - if (!peer) - return -ENOMEM; - - peer->ksnp_ni = ni; - peer->ksnp_id = id; - atomic_set(&peer->ksnp_refcount, 1); /* 1 ref for caller */ - peer->ksnp_closing = 0; - peer->ksnp_accepting = 0; - peer->ksnp_proto = NULL; - peer->ksnp_last_alive = 0; - peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; - - INIT_LIST_HEAD(&peer->ksnp_conns); - INIT_LIST_HEAD(&peer->ksnp_routes); - INIT_LIST_HEAD(&peer->ksnp_tx_queue); - INIT_LIST_HEAD(&peer->ksnp_zc_req_list); - spin_lock_init(&peer->ksnp_lock); - - spin_lock_bh(&net->ksnn_lock); - - if (net->ksnn_shutdown) { - spin_unlock_bh(&net->ksnn_lock); - - kfree(peer); - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - net->ksnn_npeers++; - - spin_unlock_bh(&net->ksnn_lock); - - *peerp = peer; - return 0; -} - -void -ksocknal_destroy_peer(struct ksock_peer *peer) -{ - struct ksock_net *net = peer->ksnp_ni->ni_data; - - CDEBUG(D_NET, "peer %s %p deleted\n", - libcfs_id2str(peer->ksnp_id), peer); - - LASSERT(!atomic_read(&peer->ksnp_refcount)); - LASSERT(!peer->ksnp_accepting); - LASSERT(list_empty(&peer->ksnp_conns)); - LASSERT(list_empty(&peer->ksnp_routes)); - LASSERT(list_empty(&peer->ksnp_tx_queue)); - LASSERT(list_empty(&peer->ksnp_zc_req_list)); - - kfree(peer); - - /* - * NB a peer's connections and routes keep a reference on their peer - * until they are destroyed, so we can be assured that _all_ state to - * do with this peer has been cleaned up when its refcount drops to - * zero. - */ - spin_lock_bh(&net->ksnn_lock); - net->ksnn_npeers--; - spin_unlock_bh(&net->ksnn_lock); -} - -struct ksock_peer * -ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) -{ - struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); - struct ksock_peer *peer; - - list_for_each_entry(peer, peer_list, ksnp_list) { - LASSERT(!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - if (peer->ksnp_id.nid != id.nid || - peer->ksnp_id.pid != id.pid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_id2str(id), - atomic_read(&peer->ksnp_refcount)); - return peer; - } - return NULL; -} - -struct ksock_peer * -ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) -{ - struct ksock_peer *peer; - - read_lock(&ksocknal_data.ksnd_global_lock); - peer = ksocknal_find_peer_locked(ni, id); - if (peer) /* +1 ref for caller? */ - ksocknal_peer_addref(peer); - read_unlock(&ksocknal_data.ksnd_global_lock); - - return peer; -} - -static void -ksocknal_unlink_peer_locked(struct ksock_peer *peer) -{ - int i; - __u32 ip; - struct ksock_interface *iface; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) { - LASSERT(i < LNET_MAX_INTERFACES); - ip = peer->ksnp_passive_ips[i]; - - iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - /* - * All IPs in peer->ksnp_passive_ips[] come from the - * interface list, therefore the call must succeed. - */ - LASSERT(iface); - - CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n", - peer, iface, iface->ksni_nroutes); - iface->ksni_npeers--; - } - - LASSERT(list_empty(&peer->ksnp_conns)); - LASSERT(list_empty(&peer->ksnp_routes)); - LASSERT(!peer->ksnp_closing); - peer->ksnp_closing = 1; - list_del(&peer->ksnp_list); - /* lose peerlist's ref */ - ksocknal_peer_decref(peer); -} - -static int -ksocknal_get_peer_info(struct lnet_ni *ni, int index, - struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip, - int *port, int *conn_count, int *share_count) -{ - struct ksock_peer *peer; - struct list_head *ptmp; - struct ksock_route *route; - struct list_head *rtmp; - int i; - int j; - int rc = -ENOENT; - - read_lock(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (!peer->ksnp_n_passive_ips && - list_empty(&peer->ksnp_routes)) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = 0; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - for (j = 0; j < peer->ksnp_n_passive_ips; j++) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = peer->ksnp_passive_ips[j]; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - list_for_each(rtmp, &peer->ksnp_routes) { - if (index-- > 0) - continue; - - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - *id = peer->ksnp_id; - *myip = route->ksnr_myipaddr; - *peer_ip = route->ksnr_ipaddr; - *port = route->ksnr_port; - *conn_count = route->ksnr_conn_count; - *share_count = route->ksnr_share_count; - rc = 0; - goto out; - } - } - } - out: - read_unlock(&ksocknal_data.ksnd_global_lock); - return rc; -} - -static void -ksocknal_associate_route_conn_locked(struct ksock_route *route, - struct ksock_conn *conn) -{ - struct ksock_peer *peer = route->ksnr_peer; - int type = conn->ksnc_type; - struct ksock_interface *iface; - - conn->ksnc_route = route; - ksocknal_route_addref(route); - - if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { - if (!route->ksnr_myipaddr) { - /* route wasn't bound locally yet (the initial route) */ - CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr, - &conn->ksnc_myipaddr); - } else { - CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr, - &route->ksnr_myipaddr, - &conn->ksnc_myipaddr); - - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface) - iface->ksni_nroutes--; - } - route->ksnr_myipaddr = conn->ksnc_myipaddr; - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface) - iface->ksni_nroutes++; - } - - route->ksnr_connected |= (1 << type); - route->ksnr_conn_count++; - - /* - * Successful connection => further attempts can - * proceed immediately - */ - route->ksnr_retry_interval = 0; -} - -static void -ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route) -{ - struct list_head *tmp; - struct ksock_conn *conn; - struct ksock_route *route2; - - LASSERT(!peer->ksnp_closing); - LASSERT(!route->ksnr_peer); - LASSERT(!route->ksnr_scheduled); - LASSERT(!route->ksnr_connecting); - LASSERT(!route->ksnr_connected); - - /* LASSERT(unique) */ - list_for_each(tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { - CERROR("Duplicate route %s %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr); - LBUG(); - } - } - - route->ksnr_peer = peer; - ksocknal_peer_addref(peer); - /* peer's routelist takes over my ref on 'route' */ - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn->ksnc_ipaddr != route->ksnr_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - /* keep going (typed routes) */ - } -} - -static void -ksocknal_del_route_locked(struct ksock_route *route) -{ - struct ksock_peer *peer = route->ksnr_peer; - struct ksock_interface *iface; - struct ksock_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - - LASSERT(!route->ksnr_deleted); - - /* Close associated conns */ - list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - if (conn->ksnc_route != route) - continue; - - ksocknal_close_conn_locked(conn, 0); - } - - if (route->ksnr_myipaddr) { - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface) - iface->ksni_nroutes--; - } - - route->ksnr_deleted = 1; - list_del(&route->ksnr_list); - ksocknal_route_decref(route); /* drop peer's ref */ - - if (list_empty(&peer->ksnp_routes) && - list_empty(&peer->ksnp_conns)) { - /* - * I've just removed the last route to a peer with no active - * connections - */ - ksocknal_unlink_peer_locked(peer); - } -} - -int -ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, - int port) -{ - struct ksock_peer *peer; - struct ksock_peer *peer2; - struct ksock_route *route; - struct ksock_route *route2; - int rc; - - if (id.nid == LNET_NID_ANY || - id.pid == LNET_PID_ANY) - return -EINVAL; - - /* Have a brand new peer ready... */ - rc = ksocknal_create_peer(&peer, ni, id); - if (rc) - return rc; - - route = ksocknal_create_route(ipaddr, port); - if (!route) { - ksocknal_peer_decref(peer); - return -ENOMEM; - } - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - /* always called with a ref on ni, so shutdown can't have started */ - LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown); - - peer2 = ksocknal_find_peer_locked(ni, id); - if (peer2) { - ksocknal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes my ref on peer */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(id.nid)); - } - - list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) { - if (route2->ksnr_ipaddr == ipaddr) { - /* Route already exists, use the old one */ - ksocknal_route_decref(route); - route2->ksnr_share_count++; - goto out; - } - } - /* Route doesn't already exist, add the new one */ - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; -out: - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return 0; -} - -static void -ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip) -{ - struct ksock_conn *conn; - struct ksock_route *route; - struct list_head *tmp; - struct list_head *nxt; - int nshared; - - LASSERT(!peer->ksnp_closing); - - /* Extra ref prevents peer disappearing until I'm done with it */ - ksocknal_peer_addref(peer); - - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - /* no match */ - if (!(!ip || route->ksnr_ipaddr == ip)) - continue; - - route->ksnr_share_count = 0; - /* This deletes associated conns too */ - ksocknal_del_route_locked(route); - } - - nshared = 0; - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - nshared += route->ksnr_share_count; - } - - if (!nshared) { - /* - * remove everything else if there are no explicit entries - * left - */ - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - /* we should only be removing auto-entries */ - LASSERT(!route->ksnr_share_count); - ksocknal_del_route_locked(route); - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - - ksocknal_close_conn_locked(conn, 0); - } - } - - ksocknal_peer_decref(peer); - /* NB peer unlinks itself when last conn/route is removed */ -} - -static int -ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) -{ - LIST_HEAD(zombies); - struct list_head *ptmp; - struct list_head *pnxt; - struct ksock_peer *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) { - lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - } else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) && - (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid))) - continue; - - ksocknal_peer_addref(peer); /* a ref for me... */ - - ksocknal_del_peer_locked(peer, ip); - - if (peer->ksnp_closing && - !list_empty(&peer->ksnp_tx_queue)) { - LASSERT(list_empty(&peer->ksnp_conns)); - LASSERT(list_empty(&peer->ksnp_routes)); - - list_splice_init(&peer->ksnp_tx_queue, - &zombies); - } - - ksocknal_peer_decref(peer); /* ...till here */ - - rc = 0; /* matched! */ - } - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_txlist_done(ni, &zombies, 1); - - return rc; -} - -static struct ksock_conn * -ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index) -{ - struct ksock_peer *peer; - struct list_head *ptmp; - struct ksock_conn *conn; - struct list_head *ctmp; - int i; - - read_lock(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - LASSERT(!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - list_for_each(ctmp, &peer->ksnp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, struct ksock_conn, - ksnc_list); - ksocknal_conn_addref(conn); - read_unlock(&ksocknal_data.ksnd_global_lock); - return conn; - } - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - return NULL; -} - -static struct ksock_sched * -ksocknal_choose_scheduler_locked(unsigned int cpt) -{ - struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt]; - struct ksock_sched *sched; - int i; - - LASSERT(info->ksi_nthreads > 0); - - sched = &info->ksi_scheds[0]; - /* - * NB: it's safe so far, but info->ksi_nthreads could be changed - * at runtime when we have dynamic LNet configuration, then we - * need to take care of this. - */ - for (i = 1; i < info->ksi_nthreads; i++) { - if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns) - sched = &info->ksi_scheds[i]; - } - - return sched; -} - -static int -ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs) -{ - struct ksock_net *net = ni->ni_data; - int i; - int nip; - - read_lock(&ksocknal_data.ksnd_global_lock); - - nip = net->ksnn_ninterfaces; - LASSERT(nip <= LNET_MAX_INTERFACES); - - /* - * Only offer interfaces for additional connections if I have - * more than one. - */ - if (nip < 2) { - read_unlock(&ksocknal_data.ksnd_global_lock); - return 0; - } - - for (i = 0; i < nip; i++) { - ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; - LASSERT(ipaddrs[i]); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - return nip; -} - -static int -ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips) -{ - int best_netmatch = 0; - int best_xor = 0; - int best = -1; - int this_xor; - int this_netmatch; - int i; - - for (i = 0; i < nips; i++) { - if (!ips[i]) - continue; - - this_xor = ips[i] ^ iface->ksni_ipaddr; - this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0; - - if (!(best < 0 || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_xor > this_xor))) - continue; - - best = i; - best_netmatch = this_netmatch; - best_xor = this_xor; - } - - LASSERT(best >= 0); - return best; -} - -static int -ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct ksock_net *net = peer->ksnp_ni->ni_data; - struct ksock_interface *iface; - struct ksock_interface *best_iface; - int n_ips; - int i; - int j; - int k; - __u32 ip; - __u32 xor; - int this_netmatch; - int best_netmatch; - int best_npeers; - - /* - * CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness shouldn't matter - */ - /* - * Also note that I'm not going to return more than n_peerips - * interfaces, even if I have more myself - */ - write_lock_bh(global_lock); - - LASSERT(n_peerips <= LNET_MAX_INTERFACES); - LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* - * Only match interfaces for additional connections - * if I have > 1 interface - */ - n_ips = (net->ksnn_ninterfaces < 2) ? 0 : - min(n_peerips, net->ksnn_ninterfaces); - - for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) { - /* ^ yes really... */ - - /* - * If we have any new interfaces, first tick off all the - * peer IPs that match old interfaces, then choose new - * interfaces to match the remaining peer IPS. - * We don't forget interfaces we've stopped using; we might - * start using them again... - */ - if (i < peer->ksnp_n_passive_ips) { - /* Old interface. */ - ip = peer->ksnp_passive_ips[i]; - best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - - /* peer passive ips are kept up to date */ - LASSERT(best_iface); - } else { - /* choose a new interface */ - LASSERT(i == peer->ksnp_n_passive_ips); - - best_iface = NULL; - best_netmatch = 0; - best_npeers = 0; - - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - ip = iface->ksni_ipaddr; - - for (k = 0; k < peer->ksnp_n_passive_ips; k++) - if (peer->ksnp_passive_ips[k] == ip) - break; - - if (k < peer->ksnp_n_passive_ips) /* using it already */ - continue; - - k = ksocknal_match_peerip(iface, peerips, - n_peerips); - xor = ip ^ peerips[k]; - this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0; - - if (!(!best_iface || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_npeers > iface->ksni_npeers))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_npeers = iface->ksni_npeers; - } - - LASSERT(best_iface); - - best_iface->ksni_npeers++; - ip = best_iface->ksni_ipaddr; - peer->ksnp_passive_ips[i] = ip; - peer->ksnp_n_passive_ips = i + 1; - } - - /* mark the best matching peer IP used */ - j = ksocknal_match_peerip(best_iface, peerips, n_peerips); - peerips[j] = 0; - } - - /* Overwrite input peer IP addresses */ - memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); - - write_unlock_bh(global_lock); - - return n_ips; -} - -static void -ksocknal_create_routes(struct ksock_peer *peer, int port, - __u32 *peer_ipaddrs, int npeer_ipaddrs) -{ - struct ksock_route *newroute = NULL; - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct lnet_ni *ni = peer->ksnp_ni; - struct ksock_net *net = ni->ni_data; - struct list_head *rtmp; - struct ksock_route *route; - struct ksock_interface *iface; - struct ksock_interface *best_iface; - int best_netmatch; - int this_netmatch; - int best_nroutes; - int i; - int j; - - /* - * CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness here shouldn't matter - */ - write_lock_bh(global_lock); - - if (net->ksnn_ninterfaces < 2) { - /* - * Only create additional connections - * if I have > 1 interface - */ - write_unlock_bh(global_lock); - return; - } - - LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES); - - for (i = 0; i < npeer_ipaddrs; i++) { - if (newroute) { - newroute->ksnr_ipaddr = peer_ipaddrs[i]; - } else { - write_unlock_bh(global_lock); - - newroute = ksocknal_create_route(peer_ipaddrs[i], port); - if (!newroute) - return; - - write_lock_bh(global_lock); - } - - if (peer->ksnp_closing) { - /* peer got closed under me */ - break; - } - - /* Already got a route? */ - route = NULL; - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, ksnr_list); - - if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) - break; - - route = NULL; - } - if (route) - continue; - - best_iface = NULL; - best_nroutes = 0; - best_netmatch = 0; - - LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* Select interface to connect from */ - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - - /* Using this interface already? */ - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - if (route->ksnr_myipaddr == iface->ksni_ipaddr) - break; - - route = NULL; - } - if (route) - continue; - - this_netmatch = (!((iface->ksni_ipaddr ^ - newroute->ksnr_ipaddr) & - iface->ksni_netmask)) ? 1 : 0; - - if (!(!best_iface || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_nroutes > iface->ksni_nroutes))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_nroutes = iface->ksni_nroutes; - } - - if (!best_iface) - continue; - - newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; - best_iface->ksni_nroutes++; - - ksocknal_add_route_locked(peer, newroute); - newroute = NULL; - } - - write_unlock_bh(global_lock); - if (newroute) - ksocknal_route_decref(newroute); -} - -int -ksocknal_accept(struct lnet_ni *ni, struct socket *sock) -{ - struct ksock_connreq *cr; - int rc; - __u32 peer_ip; - int peer_port; - - rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT(!rc); /* we succeeded before */ - - cr = kzalloc(sizeof(*cr), GFP_NOFS); - if (!cr) { - LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n", - &peer_ip); - return -ENOMEM; - } - - lnet_ni_addref(ni); - cr->ksncr_ni = ni; - cr->ksncr_sock = sock; - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - - list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); - wake_up(&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - return 0; -} - -static int -ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr) -{ - struct ksock_route *route; - - list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) { - if (route->ksnr_ipaddr == ipaddr) - return route->ksnr_connecting; - } - return 0; -} - -int -ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, - struct socket *sock, int type) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - LIST_HEAD(zombies); - struct lnet_process_id peerid; - struct list_head *tmp; - __u64 incarnation; - struct ksock_conn *conn; - struct ksock_conn *conn2; - struct ksock_peer *peer = NULL; - struct ksock_peer *peer2; - struct ksock_sched *sched; - struct ksock_hello_msg *hello; - int cpt; - struct ksock_tx *tx; - struct ksock_tx *txtmp; - int rc; - int active; - char *warn = NULL; - - active = !!route; - - LASSERT(active == (type != SOCKLND_CONN_NONE)); - - conn = kzalloc(sizeof(*conn), GFP_NOFS); - if (!conn) { - rc = -ENOMEM; - goto failed_0; - } - - conn->ksnc_peer = NULL; - conn->ksnc_route = NULL; - conn->ksnc_sock = sock; - /* - * 2 ref, 1 for conn, another extra ref prevents socket - * being closed before establishment of connection - */ - atomic_set(&conn->ksnc_sock_refcount, 2); - conn->ksnc_type = type; - ksocknal_lib_save_callback(sock, conn); - atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ - - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - - INIT_LIST_HEAD(&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - conn->ksnc_tx_carrier = NULL; - atomic_set(&conn->ksnc_tx_nob, 0); - - hello = kvzalloc(offsetof(struct ksock_hello_msg, - kshm_ips[LNET_MAX_INTERFACES]), - GFP_KERNEL); - if (!hello) { - rc = -ENOMEM; - goto failed_1; - } - - /* stash conn's local and remote addrs */ - rc = ksocknal_lib_get_conn_addrs(conn); - if (rc) - goto failed_1; - - /* - * Find out/confirm peer's NID and connection type and get the - * vector of interfaces she's willing to let me connect to. - * Passive connections use the listener timeout since the peer sends - * eagerly - */ - if (active) { - peer = route->ksnr_peer; - LASSERT(ni == peer->ksnp_ni); - - /* Active connection sends HELLO eagerly */ - hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); - peerid = peer->ksnp_id; - - write_lock_bh(global_lock); - conn->ksnc_proto = peer->ksnp_proto; - write_unlock_bh(global_lock); - - if (!conn->ksnc_proto) { - conn->ksnc_proto = &ksocknal_protocol_v3x; -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol == 2) - conn->ksnc_proto = &ksocknal_protocol_v2x; - else if (*ksocknal_tunables.ksnd_protocol == 1) - conn->ksnc_proto = &ksocknal_protocol_v1x; -#endif - } - - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - if (rc) - goto failed_1; - } else { - peerid.nid = LNET_NID_ANY; - peerid.pid = LNET_PID_ANY; - - /* Passive, get protocol from peer */ - conn->ksnc_proto = NULL; - } - - rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation); - if (rc < 0) - goto failed_1; - - LASSERT(!rc || active); - LASSERT(conn->ksnc_proto); - LASSERT(peerid.nid != LNET_NID_ANY); - - cpt = lnet_cpt_of_nid(peerid.nid); - - if (active) { - ksocknal_peer_addref(peer); - write_lock_bh(global_lock); - } else { - rc = ksocknal_create_peer(&peer, ni, peerid); - if (rc) - goto failed_1; - - write_lock_bh(global_lock); - - /* called with a ref on ni, so shutdown can't have started */ - LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown); - - peer2 = ksocknal_find_peer_locked(ni, peerid); - if (!peer2) { - /* - * NB this puts an "empty" peer in the peer - * table (which takes my ref) - */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); - } else { - ksocknal_peer_decref(peer); - peer = peer2; - } - - /* +1 ref for me */ - ksocknal_peer_addref(peer); - peer->ksnp_accepting++; - - /* - * Am I already connecting to this guy? Resolve in - * favour of higher NID... - */ - if (peerid.nid < ni->ni_nid && - ksocknal_connecting(peer, conn->ksnc_ipaddr)) { - rc = EALREADY; - warn = "connection race resolution"; - goto failed_2; - } - } - - if (peer->ksnp_closing || - (active && route->ksnr_deleted)) { - /* peer/route got closed under me */ - rc = -ESTALE; - warn = "peer/route removed"; - goto failed_2; - } - - if (!peer->ksnp_proto) { - /* - * Never connected before. - * NB recv_hello may have returned EPROTO to signal my peer - * wants a different protocol than the one I asked for. - */ - LASSERT(list_empty(&peer->ksnp_conns)); - - peer->ksnp_proto = conn->ksnc_proto; - peer->ksnp_incarnation = incarnation; - } - - if (peer->ksnp_proto != conn->ksnc_proto || - peer->ksnp_incarnation != incarnation) { - /* Peer rebooted or I've got the wrong protocol version */ - ksocknal_close_peer_conns_locked(peer, 0, 0); - - peer->ksnp_proto = NULL; - rc = ESTALE; - warn = peer->ksnp_incarnation != incarnation ? - "peer rebooted" : - "wrong proto version"; - goto failed_2; - } - - switch (rc) { - default: - LBUG(); - case 0: - break; - case EALREADY: - warn = "lost conn race"; - goto failed_2; - case EPROTO: - warn = "retry with different protocol version"; - goto failed_2; - } - - /* - * Refuse to duplicate an existing connection, unless this is a - * loopback connection - */ - if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || - conn2->ksnc_myipaddr != conn->ksnc_myipaddr || - conn2->ksnc_type != conn->ksnc_type) - continue; - - /* - * Reply on a passive connection attempt so the peer - * realises we're connected. - */ - LASSERT(!rc); - if (!active) - rc = EALREADY; - - warn = "duplicate"; - goto failed_2; - } - } - - /* - * If the connection created by this route didn't bind to the IP - * address the route connected to, the connection/route matching - * code below probably isn't going to work. - */ - if (active && - route->ksnr_ipaddr != conn->ksnc_ipaddr) { - CERROR("Route %s %pI4h connected to %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr, - &conn->ksnc_ipaddr); - } - - /* - * Search for a route corresponding to the new connection and - * create an association. This allows incoming connections created - * by routes in my peer to match my own route entries so I don't - * continually create duplicate routes. - */ - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - break; - } - - conn->ksnc_peer = peer; /* conn takes my ref on peer */ - peer->ksnp_last_alive = cfs_time_current(); - peer->ksnp_send_keepalive = 0; - peer->ksnp_error = 0; - - sched = ksocknal_choose_scheduler_locked(cpt); - sched->kss_nconns++; - conn->ksnc_scheduler = sched; - - conn->ksnc_tx_last_post = cfs_time_current(); - /* Set the deadline for the outgoing HELLO to drain */ - conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; - conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with adding to peer's conn list */ - - list_add(&conn->ksnc_list, &peer->ksnp_conns); - ksocknal_conn_addref(conn); - - ksocknal_new_packet(conn, 0); - - conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn); - - /* Take packets blocking for this connection. */ - list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) { - int match = conn->ksnc_proto->pro_match_tx(conn, tx, - tx->tx_nonblk); - - if (match == SOCKNAL_MATCH_NO) - continue; - - list_del(&tx->tx_list); - ksocknal_queue_tx_locked(tx, conn); - } - - write_unlock_bh(global_lock); - - /* - * We've now got a new connection. Any errors from here on are just - * like "normal" comms errors and we close the connection normally. - * NB (a) we still have to send the reply HELLO for passive - * connections, - * (b) normal I/O on the conn is blocked until I setup and call the - * socket callbacks. - */ - CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n", - libcfs_id2str(peerid), conn->ksnc_proto->pro_version, - &conn->ksnc_myipaddr, &conn->ksnc_ipaddr, - conn->ksnc_port, incarnation, cpt, - (int)(sched - &sched->kss_info->ksi_scheds[0])); - - if (active) { - /* additional routes after interface exchange? */ - ksocknal_create_routes(peer, conn->ksnc_port, - hello->kshm_ips, hello->kshm_nips); - } else { - hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, - hello->kshm_nips); - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - } - - kvfree(hello); - - /* - * setup the socket AFTER I've received hello (it disables - * SO_LINGER). I might call back to the acceptor who may want - * to send a protocol version response and then close the - * socket; this ensures the socket only tears down after the - * response has been sent. - */ - if (!rc) - rc = ksocknal_lib_setup_sock(sock); - - write_lock_bh(global_lock); - - /* NB my callbacks block while I hold ksnd_global_lock */ - ksocknal_lib_set_callback(sock, conn); - - if (!active) - peer->ksnp_accepting--; - - write_unlock_bh(global_lock); - - if (rc) { - write_lock_bh(global_lock); - if (!conn->ksnc_closing) { - /* could be closed by another thread */ - ksocknal_close_conn_locked(conn, rc); - } - write_unlock_bh(global_lock); - } else if (!ksocknal_connsock_addref(conn)) { - /* Allow I/O to proceed. */ - ksocknal_read_callback(conn); - ksocknal_write_callback(conn); - ksocknal_connsock_decref(conn); - } - - ksocknal_connsock_decref(conn); - ksocknal_conn_decref(conn); - return rc; - - failed_2: - if (!peer->ksnp_closing && - list_empty(&peer->ksnp_conns) && - list_empty(&peer->ksnp_routes)) { - list_add(&zombies, &peer->ksnp_tx_queue); - list_del_init(&peer->ksnp_tx_queue); - ksocknal_unlink_peer_locked(peer); - } - - write_unlock_bh(global_lock); - - if (warn) { - if (rc < 0) - CERROR("Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - else - CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - } - - if (!active) { - if (rc > 0) { - /* - * Request retry by replying with CONN_NONE - * ksnc_proto has been set already - */ - conn->ksnc_type = SOCKLND_CONN_NONE; - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); - } - - write_lock_bh(global_lock); - peer->ksnp_accepting--; - write_unlock_bh(global_lock); - } - - ksocknal_txlist_done(ni, &zombies, 1); - ksocknal_peer_decref(peer); - -failed_1: - kvfree(hello); - - kfree(conn); - -failed_0: - sock_release(sock); - return rc; -} - -void -ksocknal_close_conn_locked(struct ksock_conn *conn, int error) -{ - /* - * This just does the immmediate housekeeping, and queues the - * connection for the reaper to terminate. - * Caller holds ksnd_global_lock exclusively in irq context - */ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_route *route; - struct ksock_conn *conn2; - struct list_head *tmp; - - LASSERT(!peer->ksnp_error); - LASSERT(!conn->ksnc_closing); - conn->ksnc_closing = 1; - - /* ksnd_deathrow_conns takes over peer's ref */ - list_del(&conn->ksnc_list); - - route = conn->ksnc_route; - if (route) { - /* dissociate conn from route... */ - LASSERT(!route->ksnr_deleted); - LASSERT(route->ksnr_connected & (1 << conn->ksnc_type)); - - conn2 = NULL; - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_route == route && - conn2->ksnc_type == conn->ksnc_type) - break; - - conn2 = NULL; - } - if (!conn2) - route->ksnr_connected &= ~(1 << conn->ksnc_type); - - conn->ksnc_route = NULL; - - ksocknal_route_decref(route); /* drop conn's ref on route */ - } - - if (list_empty(&peer->ksnp_conns)) { - /* No more connections to this peer */ - - if (!list_empty(&peer->ksnp_tx_queue)) { - struct ksock_tx *tx; - - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); - - /* - * throw them to the last connection..., - * these TXs will be send to /dev/null by scheduler - */ - list_for_each_entry(tx, &peer->ksnp_tx_queue, - tx_list) - ksocknal_tx_prep(conn, tx); - - spin_lock_bh(&conn->ksnc_scheduler->kss_lock); - list_splice_init(&peer->ksnp_tx_queue, - &conn->ksnc_tx_queue); - spin_unlock_bh(&conn->ksnc_scheduler->kss_lock); - } - - peer->ksnp_proto = NULL; /* renegotiate protocol version */ - peer->ksnp_error = error; /* stash last conn close reason */ - - if (list_empty(&peer->ksnp_routes)) { - /* - * I've just closed last conn belonging to a - * peer with no routes to it - */ - ksocknal_unlink_peer_locked(peer); - } - } - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - list_add_tail(&conn->ksnc_list, - &ksocknal_data.ksnd_deathrow_conns); - wake_up(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_peer_failed(struct ksock_peer *peer) -{ - int notify = 0; - unsigned long last_alive = 0; - - /* - * There has been a connection failure or comms error; but I'll only - * tell LNET I think the peer is dead if it's to another kernel and - * there are no connections or connection attempts in existence. - */ - read_lock(&ksocknal_data.ksnd_global_lock); - - if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) && - list_empty(&peer->ksnp_conns) && - !peer->ksnp_accepting && - !ksocknal_find_connecting_route_locked(peer)) { - notify = 1; - last_alive = peer->ksnp_last_alive; - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (notify) - lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0, - last_alive); -} - -void -ksocknal_finalize_zcreq(struct ksock_conn *conn) -{ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_tx *tx; - struct ksock_tx *temp; - struct ksock_tx *tmp; - LIST_HEAD(zlist); - - /* - * NB safe to finalize TXs because closing of socket will - * abort all buffered data - */ - LASSERT(!conn->ksnc_sock); - - spin_lock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) { - if (tx->tx_conn != conn) - continue; - - LASSERT(tx->tx_msg.ksm_zc_cookies[0]); - - tx->tx_msg.ksm_zc_cookies[0] = 0; - tx->tx_zc_aborted = 1; /* mark it as not-acked */ - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - } - - spin_unlock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) { - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } -} - -void -ksocknal_terminate_conn(struct ksock_conn *conn) -{ - /* - * This gets called by the reaper (guaranteed thread context) to - * disengage the socket from its callbacks and close it. - * ksnc_refcount will eventually hit zero, and then the reaper will - * destroy it. - */ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_sched *sched = conn->ksnc_scheduler; - int failed = 0; - - LASSERT(conn->ksnc_closing); - - /* wake up the scheduler to "send" all remaining packets to /dev/null */ - spin_lock_bh(&sched->kss_lock); - - /* a closing conn is always ready to tx */ - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && - !list_empty(&conn->ksnc_tx_queue)) { - list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - wake_up(&sched->kss_waitq); - } - - spin_unlock_bh(&sched->kss_lock); - - /* serialise with callbacks */ - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_lib_reset_callback(conn->ksnc_sock, conn); - - /* - * OK, so this conn may not be completely disengaged from its - * scheduler yet, but it _has_ committed to terminate... - */ - conn->ksnc_scheduler->kss_nconns--; - - if (peer->ksnp_error) { - /* peer's last conn closed in error */ - LASSERT(list_empty(&peer->ksnp_conns)); - failed = 1; - peer->ksnp_error = 0; /* avoid multiple notifications */ - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - if (failed) - ksocknal_peer_failed(peer); - - /* - * The socket is closed on the final put; either here, or in - * ksocknal_{send,recv}msg(). Since we set up the linger2 option - * when the connection was established, this will close the socket - * immediately, aborting anything buffered in it. Any hung - * zero-copy transmits will therefore complete in finite time. - */ - ksocknal_connsock_decref(conn); -} - -void -ksocknal_queue_zombie_conn(struct ksock_conn *conn) -{ - /* Queue the conn for the reaper to destroy */ - - LASSERT(!atomic_read(&conn->ksnc_conn_refcount)); - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); - wake_up(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_destroy_conn(struct ksock_conn *conn) -{ - unsigned long last_rcv; - - /* Final coup-de-grace of the reaper */ - CDEBUG(D_NET, "connection %p\n", conn); - - LASSERT(!atomic_read(&conn->ksnc_conn_refcount)); - LASSERT(!atomic_read(&conn->ksnc_sock_refcount)); - LASSERT(!conn->ksnc_sock); - LASSERT(!conn->ksnc_route); - LASSERT(!conn->ksnc_tx_scheduled); - LASSERT(!conn->ksnc_rx_scheduled); - LASSERT(list_empty(&conn->ksnc_tx_queue)); - - /* complete current receive if any */ - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_LNET_PAYLOAD: - last_rcv = conn->ksnc_rx_deadline - - *ksocknal_tunables.ksnd_timeout * HZ; - CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, - &conn->ksnc_ipaddr, conn->ksnc_port, - iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left, - cfs_duration_sec(cfs_time_sub(cfs_time_current(), - last_rcv))); - lnet_finalize(conn->ksnc_peer->ksnp_ni, - conn->ksnc_cookie, -EIO); - break; - case SOCKNAL_RX_LNET_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_SLOP: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port); - break; - default: - LBUG(); - break; - } - - ksocknal_peer_decref(conn->ksnc_peer); - - kfree(conn); -} - -int -ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why) -{ - struct ksock_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - if (!ipaddr || conn->ksnc_ipaddr == ipaddr) { - count++; - ksocknal_close_conn_locked(conn, why); - } - } - - return count; -} - -int -ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why) -{ - struct ksock_peer *peer = conn->ksnc_peer; - __u32 ipaddr = conn->ksnc_ipaddr; - int count; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - count = ksocknal_close_peer_conns_locked(peer, ipaddr, why); - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return count; -} - -int -ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) -{ - struct ksock_peer *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) { - lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - } else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, - &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid))) - continue; - - count += ksocknal_close_peer_conns_locked(peer, ipaddr, - 0); - } - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr) - return 0; - - if (!count) - return -ENOENT; - else - return 0; -} - -void -ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive) -{ - /* - * The router is telling me she's been notified of a change in - * gateway state.... - */ - struct lnet_process_id id = {0}; - - id.nid = gw_nid; - id.pid = LNET_PID_ANY; - - CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), - alive ? "up" : "down"); - - if (!alive) { - /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns(id, 0); - return; - } - - /* - * ...otherwise do nothing. We can only establish new connections - * if we have autroutes, and these connect on demand. - */ -} - -void -ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when) -{ - int connect = 1; - unsigned long last_alive = 0; - unsigned long now = cfs_time_current(); - struct ksock_peer *peer = NULL; - rwlock_t *glock = &ksocknal_data.ksnd_global_lock; - struct lnet_process_id id = { - .nid = nid, - .pid = LNET_PID_LUSTRE, - }; - - read_lock(glock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer) { - struct ksock_conn *conn; - int bufnob; - - list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) { - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - - if (bufnob < conn->ksnc_tx_bufnob) { - /* something got ACKed */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - peer->ksnp_last_alive = now; - conn->ksnc_tx_bufnob = bufnob; - } - } - - last_alive = peer->ksnp_last_alive; - if (!ksocknal_find_connectable_route_locked(peer)) - connect = 0; - } - - read_unlock(glock); - - if (last_alive) - *when = last_alive; - - CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n", - libcfs_nid2str(nid), peer, - last_alive ? cfs_duration_sec(now - last_alive) : -1, - connect); - - if (!connect) - return; - - ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port()); - - write_lock_bh(glock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer) - ksocknal_launch_all_connections_locked(peer); - - write_unlock_bh(glock); -} - -static void -ksocknal_push_peer(struct ksock_peer *peer) -{ - int index; - int i; - struct list_head *tmp; - struct ksock_conn *conn; - - for (index = 0; ; index++) { - read_lock(&ksocknal_data.ksnd_global_lock); - - i = 0; - conn = NULL; - - list_for_each(tmp, &peer->ksnp_conns) { - if (i++ == index) { - conn = list_entry(tmp, struct ksock_conn, - ksnc_list); - ksocknal_conn_addref(conn); - break; - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (!conn) - break; - - ksocknal_lib_push_conn(conn); - ksocknal_conn_decref(conn); - } -} - -static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) -{ - struct list_head *start; - struct list_head *end; - struct list_head *tmp; - int rc = -ENOENT; - unsigned int hsize = ksocknal_data.ksnd_peer_hash_size; - - if (id.nid == LNET_NID_ANY) { - start = &ksocknal_data.ksnd_peers[0]; - end = &ksocknal_data.ksnd_peers[hsize - 1]; - } else { - start = ksocknal_nid2peerlist(id.nid); - end = ksocknal_nid2peerlist(id.nid); - } - - for (tmp = start; tmp <= end; tmp++) { - int peer_off; /* searching offset in peer hash table */ - - for (peer_off = 0; ; peer_off++) { - struct ksock_peer *peer; - int i = 0; - - read_lock(&ksocknal_data.ksnd_global_lock); - list_for_each_entry(peer, tmp, ksnp_list) { - if (!((id.nid == LNET_NID_ANY || - id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer->ksnp_id.pid))) - continue; - - if (i++ == peer_off) { - ksocknal_peer_addref(peer); - break; - } - } - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (!i) /* no match */ - break; - - rc = 0; - ksocknal_push_peer(peer); - ksocknal_peer_decref(peer); - } - } - return rc; -} - -static int -ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask) -{ - struct ksock_net *net = ni->ni_data; - struct ksock_interface *iface; - int rc; - int i; - int j; - struct list_head *ptmp; - struct ksock_peer *peer; - struct list_head *rtmp; - struct ksock_route *route; - - if (!ipaddress || !netmask) - return -EINVAL; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - iface = ksocknal_ip2iface(ni, ipaddress); - if (iface) { - /* silently ignore dups */ - rc = 0; - } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) { - rc = -ENOSPC; - } else { - iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; - - iface->ksni_ipaddr = ipaddress; - iface->ksni_netmask = netmask; - iface->ksni_nroutes = 0; - iface->ksni_npeers = 0; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, - ksnp_list); - - for (j = 0; j < peer->ksnp_n_passive_ips; j++) - if (peer->ksnp_passive_ips[j] == ipaddress) - iface->ksni_npeers++; - - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - if (route->ksnr_myipaddr == ipaddress) - iface->ksni_nroutes++; - } - } - } - - rc = 0; - /* - * NB only new connections will pay attention to the - * new interface! - */ - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return rc; -} - -static void -ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr) -{ - struct list_head *tmp; - struct list_head *nxt; - struct ksock_route *route; - struct ksock_conn *conn; - int i; - int j; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) - if (peer->ksnp_passive_ips[i] == ipaddr) { - for (j = i + 1; j < peer->ksnp_n_passive_ips; j++) - peer->ksnp_passive_ips[j - 1] = - peer->ksnp_passive_ips[j]; - peer->ksnp_n_passive_ips--; - break; - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route->ksnr_myipaddr != ipaddr) - continue; - - if (route->ksnr_share_count) { - /* Manually created; keep, but unbind */ - route->ksnr_myipaddr = 0; - } else { - ksocknal_del_route_locked(route); - } - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn->ksnc_myipaddr == ipaddr) - ksocknal_close_conn_locked(conn, 0); - } -} - -static int -ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress) -{ - struct ksock_net *net = ni->ni_data; - int rc = -ENOENT; - struct list_head *tmp; - struct list_head *nxt; - struct ksock_peer *peer; - __u32 this_ip; - int i; - int j; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - - if (!(!ipaddress || ipaddress == this_ip)) - continue; - - rc = 0; - - for (j = i + 1; j < net->ksnn_ninterfaces; j++) - net->ksnn_interfaces[j - 1] = - net->ksnn_interfaces[j]; - - net->ksnn_ninterfaces--; - - for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, - &ksocknal_data.ksnd_peers[j]) { - peer = list_entry(tmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - ksocknal_peer_del_interface_locked(peer, this_ip); - } - } - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return rc; -} - -int -ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) -{ - struct lnet_process_id id = {0}; - struct libcfs_ioctl_data *data = arg; - int rc; - - switch (cmd) { - case IOC_LIBCFS_GET_INTERFACE: { - struct ksock_net *net = ni->ni_data; - struct ksock_interface *iface; - - read_lock(&ksocknal_data.ksnd_global_lock); - - if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) { - rc = -ENOENT; - } else { - rc = 0; - iface = &net->ksnn_interfaces[data->ioc_count]; - - data->ioc_u32[0] = iface->ksni_ipaddr; - data->ioc_u32[1] = iface->ksni_netmask; - data->ioc_u32[2] = iface->ksni_npeers; - data->ioc_u32[3] = iface->ksni_nroutes; - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - return rc; - } - - case IOC_LIBCFS_ADD_INTERFACE: - return ksocknal_add_interface(ni, - data->ioc_u32[0], /* IP address */ - data->ioc_u32[1]); /* net mask */ - - case IOC_LIBCFS_DEL_INTERFACE: - return ksocknal_del_interface(ni, - data->ioc_u32[0]); /* IP address */ - - case IOC_LIBCFS_GET_PEER: { - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; - - rc = ksocknal_get_peer_info(ni, data->ioc_count, - &id, &myip, &ip, &port, - &conn_count, &share_count); - if (rc) - return rc; - - data->ioc_nid = id.nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - data->ioc_u32[2] = myip; - data->ioc_u32[3] = conn_count; - data->ioc_u32[4] = id.pid; - return 0; - } - - case IOC_LIBCFS_ADD_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_LUSTRE; - return ksocknal_add_peer(ni, id, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - - case IOC_LIBCFS_DEL_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_del_peer(ni, id, - data->ioc_u32[0]); /* IP */ - - case IOC_LIBCFS_GET_CONN: { - int txmem; - int rxmem; - int nagle; - struct ksock_conn *conn; - - conn = ksocknal_get_conn_by_idx(ni, data->ioc_count); - if (!conn) - return -ENOENT; - - ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - - data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; - data->ioc_flags = nagle; - data->ioc_u32[0] = conn->ksnc_ipaddr; - data->ioc_u32[1] = conn->ksnc_port; - data->ioc_u32[2] = conn->ksnc_myipaddr; - data->ioc_u32[3] = conn->ksnc_type; - data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt; - data->ioc_u32[5] = rxmem; - data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; - ksocknal_conn_decref(conn); - return 0; - } - - case IOC_LIBCFS_CLOSE_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_close_matching_conns(id, - data->ioc_u32[0]); - - case IOC_LIBCFS_REGISTER_MYNID: - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) - return 0; - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - - case IOC_LIBCFS_PUSH_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_push(ni, id); - - default: - return -EINVAL; - } - /* not reached */ -} - -static void -ksocknal_free_buffers(void) -{ - LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs)); - - if (ksocknal_data.ksnd_sched_info) { - struct ksock_sched_info *info; - int i; - - cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) - kfree(info->ksi_scheds); - cfs_percpt_free(ksocknal_data.ksnd_sched_info); - } - - kvfree(ksocknal_data.ksnd_peers); - - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - struct list_head zlist; - struct ksock_tx *tx; - struct ksock_tx *temp; - - list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); - list_del_init(&ksocknal_data.ksnd_idle_noop_txs); - spin_unlock(&ksocknal_data.ksnd_tx_lock); - - list_for_each_entry_safe(tx, temp, &zlist, tx_list) { - list_del(&tx->tx_list); - kfree(tx); - } - } else { - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } -} - -static void -ksocknal_base_shutdown(void) -{ - struct ksock_sched_info *info; - struct ksock_sched *sched; - int i; - int j; - - LASSERT(!ksocknal_data.ksnd_nnets); - - switch (ksocknal_data.ksnd_init) { - default: - LASSERT(0); - /* fall through */ - case SOCKNAL_INIT_ALL: - case SOCKNAL_INIT_DATA: - LASSERT(ksocknal_data.ksnd_peers); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - LASSERT(list_empty(&ksocknal_data.ksnd_peers[i])); - - LASSERT(list_empty(&ksocknal_data.ksnd_nets)); - LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns)); - LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns)); - LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs)); - LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes)); - - if (ksocknal_data.ksnd_sched_info) { - cfs_percpt_for_each(info, i, - ksocknal_data.ksnd_sched_info) { - if (!info->ksi_scheds) - continue; - - for (j = 0; j < info->ksi_nthreads_max; j++) { - sched = &info->ksi_scheds[j]; - LASSERT(list_empty( - &sched->kss_tx_conns)); - LASSERT(list_empty( - &sched->kss_rx_conns)); - LASSERT(list_empty( - &sched->kss_zombie_noop_txs)); - LASSERT(!sched->kss_nconns); - } - } - } - - /* flag threads to terminate; wake and wait for them to die */ - ksocknal_data.ksnd_shuttingdown = 1; - wake_up_all(&ksocknal_data.ksnd_connd_waitq); - wake_up_all(&ksocknal_data.ksnd_reaper_waitq); - - if (ksocknal_data.ksnd_sched_info) { - cfs_percpt_for_each(info, i, - ksocknal_data.ksnd_sched_info) { - if (!info->ksi_scheds) - continue; - - for (j = 0; j < info->ksi_nthreads_max; j++) { - sched = &info->ksi_scheds[j]; - wake_up_all(&sched->kss_waitq); - } - } - } - - i = 4; - read_lock(&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d threads to terminate\n", - ksocknal_data.ksnd_nthreads); - read_unlock(&ksocknal_data.ksnd_global_lock); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - read_lock(&ksocknal_data.ksnd_global_lock); - } - read_unlock(&ksocknal_data.ksnd_global_lock); - - ksocknal_free_buffers(); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; - break; - } - - module_put(THIS_MODULE); -} - -static __u64 -ksocknal_new_incarnation(void) -{ - /* The incarnation number is the time this module loaded and it - * identifies this particular instance of the socknal. - */ - return ktime_get_ns(); -} - -static int -ksocknal_base_startup(void) -{ - struct ksock_sched_info *info; - int rc; - int i; - - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT(!ksocknal_data.ksnd_nnets); - - memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */ - - ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; - ksocknal_data.ksnd_peers = kvmalloc_array(ksocknal_data.ksnd_peer_hash_size, - sizeof(struct list_head), - GFP_KERNEL); - if (!ksocknal_data.ksnd_peers) - return -ENOMEM; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); - - rwlock_init(&ksocknal_data.ksnd_global_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_nets); - - spin_lock_init(&ksocknal_data.ksnd_reaper_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns); - INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns); - INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns); - init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq); - - spin_lock_init(&ksocknal_data.ksnd_connd_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes); - init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq); - - spin_lock_init(&ksocknal_data.ksnd_tx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs); - - /* NB memset above zeros whole of ksocknal_data */ - - /* flag lists/ptrs/locks initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - try_module_get(THIS_MODULE); - - ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*info)); - if (!ksocknal_data.ksnd_sched_info) - goto failed; - - cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { - struct ksock_sched *sched; - int nthrs; - - nthrs = cfs_cpt_weight(lnet_cpt_table(), i); - if (*ksocknal_tunables.ksnd_nscheds > 0) { - nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds); - } else { - /* - * max to half of CPUs, assume another half should be - * reserved for upper layer modules - */ - nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); - } - - info->ksi_nthreads_max = nthrs; - info->ksi_cpt = i; - - info->ksi_scheds = kzalloc_cpt(info->ksi_nthreads_max * sizeof(*sched), - GFP_NOFS, i); - if (!info->ksi_scheds) - goto failed; - - for (; nthrs > 0; nthrs--) { - sched = &info->ksi_scheds[nthrs - 1]; - - sched->kss_info = info; - spin_lock_init(&sched->kss_lock); - INIT_LIST_HEAD(&sched->kss_rx_conns); - INIT_LIST_HEAD(&sched->kss_tx_conns); - INIT_LIST_HEAD(&sched->kss_zombie_noop_txs); - init_waitqueue_head(&sched->kss_waitq); - } - } - - ksocknal_data.ksnd_connd_starting = 0; - ksocknal_data.ksnd_connd_failed_stamp = 0; - ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds(); - /* - * must have at least 2 connds to remain responsive to accepts while - * connecting - */ - if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1) - *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1; - - if (*ksocknal_tunables.ksnd_nconnds_max < - *ksocknal_tunables.ksnd_nconnds) { - ksocknal_tunables.ksnd_nconnds_max = - ksocknal_tunables.ksnd_nconnds; - } - - for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { - char name[16]; - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - ksocknal_data.ksnd_connd_starting++; - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - - snprintf(name, sizeof(name), "socknal_cd%02d", i); - rc = ksocknal_thread_start(ksocknal_connd, - (void *)((uintptr_t)i), name); - if (rc) { - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - ksocknal_data.ksnd_connd_starting--; - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - CERROR("Can't spawn socknal connd: %d\n", rc); - goto failed; - } - } - - rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper"); - if (rc) { - CERROR("Can't spawn socknal reaper: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; - - return 0; - - failed: - ksocknal_base_shutdown(); - return -ENETDOWN; -} - -static void -ksocknal_debug_peerhash(struct lnet_ni *ni) -{ - struct ksock_peer *peer = NULL; - struct list_head *tmp; - int i; - - read_lock(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(tmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni == ni) - break; - - peer = NULL; - } - } - - if (peer) { - struct ksock_route *route; - struct ksock_conn *conn; - - CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n", - libcfs_id2str(peer->ksnp_id), - atomic_read(&peer->ksnp_refcount), - peer->ksnp_sharecount, peer->ksnp_closing, - peer->ksnp_accepting, peer->ksnp_error, - peer->ksnp_zc_next_cookie, - !list_empty(&peer->ksnp_tx_queue), - !list_empty(&peer->ksnp_zc_req_list)); - - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n", - atomic_read(&route->ksnr_refcount), - route->ksnr_scheduled, route->ksnr_connecting, - route->ksnr_connected, route->ksnr_deleted); - } - - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - CWARN("Conn: ref %d, sref %d, t %d, c %d\n", - atomic_read(&conn->ksnc_conn_refcount), - atomic_read(&conn->ksnc_sock_refcount), - conn->ksnc_type, conn->ksnc_closing); - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -void -ksocknal_shutdown(struct lnet_ni *ni) -{ - struct ksock_net *net = ni->ni_data; - int i; - struct lnet_process_id anyid = {0}; - - anyid.nid = LNET_NID_ANY; - anyid.pid = LNET_PID_ANY; - - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); - LASSERT(ksocknal_data.ksnd_nnets > 0); - - spin_lock_bh(&net->ksnn_lock); - net->ksnn_shutdown = 1; /* prevent new peers */ - spin_unlock_bh(&net->ksnn_lock); - - /* Delete all peers */ - ksocknal_del_peer(ni, anyid, 0); - - /* Wait for all peer state to clean up */ - i = 2; - spin_lock_bh(&net->ksnn_lock); - while (net->ksnn_npeers) { - spin_unlock_bh(&net->ksnn_lock); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - net->ksnn_npeers); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - - ksocknal_debug_peerhash(ni); - - spin_lock_bh(&net->ksnn_lock); - } - spin_unlock_bh(&net->ksnn_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(!net->ksnn_interfaces[i].ksni_npeers); - LASSERT(!net->ksnn_interfaces[i].ksni_nroutes); - } - - list_del(&net->ksnn_list); - kfree(net); - - ksocknal_data.ksnd_nnets--; - if (!ksocknal_data.ksnd_nnets) - ksocknal_base_shutdown(); -} - -static int -ksocknal_enumerate_interfaces(struct ksock_net *net) -{ - char **names; - int i; - int j; - int rc; - int n; - - n = lnet_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Can't enumerate interfaces: %d\n", n); - return n; - } - - for (i = j = 0; i < n; i++) { - int up; - __u32 ip; - __u32 mask; - - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = lnet_ipif_query(names[i], &up, &ip, &mask); - if (rc) { - CWARN("Can't get interface %s info: %d\n", - names[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s (down)\n", - names[i]); - continue; - } - - if (j == LNET_MAX_INTERFACES) { - CWARN("Ignoring interface %s (too many interfaces)\n", - names[i]); - continue; - } - - net->ksnn_interfaces[j].ksni_ipaddr = ip; - net->ksnn_interfaces[j].ksni_netmask = mask; - strlcpy(net->ksnn_interfaces[j].ksni_name, - names[i], sizeof(net->ksnn_interfaces[j].ksni_name)); - j++; - } - - lnet_ipif_free_enumeration(names, n); - - if (!j) - CERROR("Can't find any usable interfaces\n"); - - return j; -} - -static int -ksocknal_search_new_ipif(struct ksock_net *net) -{ - int new_ipif = 0; - int i; - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - char *ifnam = &net->ksnn_interfaces[i].ksni_name[0]; - char *colon = strchr(ifnam, ':'); - int found = 0; - struct ksock_net *tmp; - int j; - - if (colon) /* ignore alias device */ - *colon = 0; - - list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) { - for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) { - char *ifnam2 = - &tmp->ksnn_interfaces[j].ksni_name[0]; - char *colon2 = strchr(ifnam2, ':'); - - if (colon2) - *colon2 = 0; - - found = !strcmp(ifnam, ifnam2); - if (colon2) - *colon2 = ':'; - } - if (found) - break; - } - - new_ipif += !found; - if (colon) - *colon = ':'; - } - - return new_ipif; -} - -static int -ksocknal_start_schedulers(struct ksock_sched_info *info) -{ - int nthrs; - int rc = 0; - int i; - - if (!info->ksi_nthreads) { - if (*ksocknal_tunables.ksnd_nscheds > 0) { - nthrs = info->ksi_nthreads_max; - } else { - nthrs = cfs_cpt_weight(lnet_cpt_table(), - info->ksi_cpt); - nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); - nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs); - } - nthrs = min(nthrs, info->ksi_nthreads_max); - } else { - LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max); - /* increase two threads if there is new interface */ - nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads); - } - - for (i = 0; i < nthrs; i++) { - long id; - char name[20]; - struct ksock_sched *sched; - - id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i); - sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)]; - snprintf(name, sizeof(name), "socknal_sd%02d_%02d", - info->ksi_cpt, (int)(sched - &info->ksi_scheds[0])); - - rc = ksocknal_thread_start(ksocknal_scheduler, - (void *)id, name); - if (!rc) - continue; - - CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", - info->ksi_cpt, info->ksi_nthreads + i, rc); - break; - } - - info->ksi_nthreads += i; - return rc; -} - -static int -ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts) -{ - int newif = ksocknal_search_new_ipif(net); - int rc; - int i; - - LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table())); - - for (i = 0; i < ncpts; i++) { - struct ksock_sched_info *info; - int cpt = !cpts ? i : cpts[i]; - - LASSERT(cpt < cfs_cpt_number(lnet_cpt_table())); - info = ksocknal_data.ksnd_sched_info[cpt]; - - if (!newif && info->ksi_nthreads > 0) - continue; - - rc = ksocknal_start_schedulers(info); - if (rc) - return rc; - } - return 0; -} - -int -ksocknal_startup(struct lnet_ni *ni) -{ - struct ksock_net *net; - int rc; - int i; - - LASSERT(ni->ni_lnd == &the_ksocklnd); - - if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { - rc = ksocknal_base_startup(); - if (rc) - return rc; - } - - net = kzalloc(sizeof(*net), GFP_NOFS); - if (!net) - goto fail_0; - - spin_lock_init(&net->ksnn_lock); - net->ksnn_incarnation = ksocknal_new_incarnation(); - ni->ni_data = net; - ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout; - ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; - ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits; - ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits; - - if (!ni->ni_interfaces[0]) { - rc = ksocknal_enumerate_interfaces(net); - if (rc <= 0) - goto fail_1; - - net->ksnn_ninterfaces = 1; - } else { - for (i = 0; i < LNET_MAX_INTERFACES; i++) { - int up; - - if (!ni->ni_interfaces[i]) - break; - - rc = lnet_ipif_query(ni->ni_interfaces[i], &up, - &net->ksnn_interfaces[i].ksni_ipaddr, - &net->ksnn_interfaces[i].ksni_netmask); - - if (rc) { - CERROR("Can't get interface %s info: %d\n", - ni->ni_interfaces[i], rc); - goto fail_1; - } - - if (!up) { - CERROR("Interface %s is down\n", - ni->ni_interfaces[i]); - goto fail_1; - } - - strlcpy(net->ksnn_interfaces[i].ksni_name, - ni->ni_interfaces[i], - sizeof(net->ksnn_interfaces[i].ksni_name)); - } - net->ksnn_ninterfaces = i; - } - - /* call it before add it to ksocknal_data.ksnd_nets */ - rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts); - if (rc) - goto fail_1; - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - net->ksnn_interfaces[0].ksni_ipaddr); - list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); - - ksocknal_data.ksnd_nnets++; - - return 0; - - fail_1: - kfree(net); - fail_0: - if (!ksocknal_data.ksnd_nnets) - ksocknal_base_shutdown(); - - return -ENETDOWN; -} - -static void __exit ksocklnd_exit(void) -{ - lnet_unregister_lnd(&the_ksocklnd); -} - -static int __init ksocklnd_init(void) -{ - int rc; - - /* check ksnr_connected/connecting field large enough */ - BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4); - BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN); - - /* initialize the_ksocklnd */ - the_ksocklnd.lnd_type = SOCKLND; - the_ksocklnd.lnd_startup = ksocknal_startup; - the_ksocklnd.lnd_shutdown = ksocknal_shutdown; - the_ksocklnd.lnd_ctl = ksocknal_ctl; - the_ksocklnd.lnd_send = ksocknal_send; - the_ksocklnd.lnd_recv = ksocknal_recv; - the_ksocklnd.lnd_notify = ksocknal_notify; - the_ksocklnd.lnd_query = ksocknal_query; - the_ksocklnd.lnd_accept = ksocknal_accept; - - rc = ksocknal_tunables_init(); - if (rc) - return rc; - - lnet_register_lnd(&the_ksocklnd); - - return 0; -} - -MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("TCP Socket LNet Network Driver"); -MODULE_VERSION("2.7.0"); -MODULE_LICENSE("GPL"); - -module_init(ksocklnd_init); -module_exit(ksocklnd_exit); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h deleted file mode 100644 index 570f54ed57b1..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ /dev/null @@ -1,705 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef _SOCKLND_SOCKLND_H_ -#define _SOCKLND_SOCKLND_H_ - -#define DEBUG_PORTAL_ALLOC -#define DEBUG_SUBSYSTEM S_LND - -#include <linux/crc32.h> -#include <linux/errno.h> -#include <linux/if.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/kmod.h> -#include <linux/list.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/stat.h> -#include <linux/string.h> -#include <linux/syscalls.h> -#include <linux/sysctl.h> -#include <linux/uio.h> -#include <linux/unistd.h> -#include <asm/irq.h> -#include <net/sock.h> -#include <net/tcp.h> - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> -#include <linux/lnet/socklnd.h> - -/* assume one thread for each connection type */ -#define SOCKNAL_NSCHEDS 3 -#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1) - -#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */ -#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */ - -#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */ -#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */ - -#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */ - -/* - * risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). - * no risk if we're not running on a CONFIG_HIGHMEM platform. - */ -#ifdef CONFIG_HIGHMEM -# define SOCKNAL_RISK_KMAP_DEADLOCK 0 -#else -# define SOCKNAL_RISK_KMAP_DEADLOCK 1 -#endif - -struct ksock_sched_info; - -struct ksock_sched { /* per scheduler state */ - spinlock_t kss_lock; /* serialise */ - struct list_head kss_rx_conns; /* conn waiting to be read */ - struct list_head kss_tx_conns; /* conn waiting to be written */ - struct list_head kss_zombie_noop_txs; /* zombie noop tx list */ - wait_queue_head_t kss_waitq; /* where scheduler sleeps */ - int kss_nconns; /* # connections assigned to - * this scheduler - */ - struct ksock_sched_info *kss_info; /* owner of it */ -}; - -struct ksock_sched_info { - int ksi_nthreads_max; /* max allowed threads */ - int ksi_nthreads; /* number of threads */ - int ksi_cpt; /* CPT id */ - struct ksock_sched *ksi_scheds; /* array of schedulers */ -}; - -#define KSOCK_CPT_SHIFT 16 -#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid)) -#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT) -#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1)) - -struct ksock_interface { /* in-use interface */ - __u32 ksni_ipaddr; /* interface's IP address */ - __u32 ksni_netmask; /* interface's network mask */ - int ksni_nroutes; /* # routes using (active) */ - int ksni_npeers; /* # peers using (passive) */ - char ksni_name[IFNAMSIZ]; /* interface name */ -}; - -struct ksock_tunables { - int *ksnd_timeout; /* "stuck" socket timeout - * (seconds) - */ - int *ksnd_nscheds; /* # scheduler threads in each - * pool while starting - */ - int *ksnd_nconnds; /* # connection daemons */ - int *ksnd_nconnds_max; /* max # connection daemons */ - int *ksnd_min_reconnectms; /* first connection retry after - * (ms)... - */ - int *ksnd_max_reconnectms; /* ...exponentially increasing to - * this - */ - int *ksnd_eager_ack; /* make TCP ack eagerly? */ - int *ksnd_typed_conns; /* drive sockets by type? */ - int *ksnd_min_bulk; /* smallest "large" message */ - int *ksnd_tx_buffer_size; /* socket tx buffer size */ - int *ksnd_rx_buffer_size; /* socket rx buffer size */ - int *ksnd_nagle; /* enable NAGLE? */ - int *ksnd_round_robin; /* round robin for multiple - * interfaces - */ - int *ksnd_keepalive; /* # secs for sending keepalive - * NOOP - */ - int *ksnd_keepalive_idle; /* # idle secs before 1st probe - */ - int *ksnd_keepalive_count; /* # probes */ - int *ksnd_keepalive_intvl; /* time between probes */ - int *ksnd_credits; /* # concurrent sends */ - int *ksnd_peertxcredits; /* # concurrent sends to 1 peer - */ - int *ksnd_peerrtrcredits; /* # per-peer router buffer - * credits - */ - int *ksnd_peertimeout; /* seconds to consider peer dead - */ - int *ksnd_enable_csum; /* enable check sum */ - int *ksnd_inject_csum_error; /* set non-zero to inject - * checksum error - */ - int *ksnd_nonblk_zcack; /* always send zc-ack on - * non-blocking connection - */ - unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload - * size - */ - int *ksnd_zc_recv; /* enable ZC receive (for - * Chelsio TOE) - */ - int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to - * enable ZC receive - */ -}; - -struct ksock_net { - __u64 ksnn_incarnation; /* my epoch */ - spinlock_t ksnn_lock; /* serialise */ - struct list_head ksnn_list; /* chain on global list */ - int ksnn_npeers; /* # peers */ - int ksnn_shutdown; /* shutting down? */ - int ksnn_ninterfaces; /* IP interfaces */ - struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES]; -}; - -/** connd timeout */ -#define SOCKNAL_CONND_TIMEOUT 120 -/** reserved thread for accepting & creating new connd */ -#define SOCKNAL_CONND_RESV 1 - -struct ksock_nal_data { - int ksnd_init; /* initialisation state - */ - int ksnd_nnets; /* # networks set up */ - struct list_head ksnd_nets; /* list of nets */ - rwlock_t ksnd_global_lock; /* stabilize peer/conn - * ops - */ - struct list_head *ksnd_peers; /* hash table of all my - * known peers - */ - int ksnd_peer_hash_size; /* size of ksnd_peers */ - - int ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit - */ - struct ksock_sched_info **ksnd_sched_info; /* schedulers info */ - - atomic_t ksnd_nactive_txs; /* #active txs */ - - struct list_head ksnd_deathrow_conns; /* conns to close: - * reaper_lock - */ - struct list_head ksnd_zombie_conns; /* conns to free: - * reaper_lock - */ - struct list_head ksnd_enomem_conns; /* conns to retry: - * reaper_lock - */ - wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ - unsigned long ksnd_reaper_waketime; /* when reaper will wake - */ - spinlock_t ksnd_reaper_lock; /* serialise */ - - int ksnd_enomem_tx; /* test ENOMEM sender */ - int ksnd_stall_tx; /* test sluggish sender - */ - int ksnd_stall_rx; /* test sluggish - * receiver - */ - struct list_head ksnd_connd_connreqs; /* incoming connection - * requests - */ - struct list_head ksnd_connd_routes; /* routes waiting to be - * connected - */ - wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */ - int ksnd_connd_connecting; /* # connds connecting - */ - time64_t ksnd_connd_failed_stamp;/* time stamp of the - * last failed - * connecting attempt - */ - time64_t ksnd_connd_starting_stamp;/* time stamp of the - * last starting connd - */ - unsigned int ksnd_connd_starting; /* # starting connd */ - unsigned int ksnd_connd_running; /* # running connd */ - spinlock_t ksnd_connd_lock; /* serialise */ - - struct list_head ksnd_idle_noop_txs; /* list head for freed - * noop tx - */ - spinlock_t ksnd_tx_lock; /* serialise, g_lock - * unsafe - */ -}; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_ALL 2 - -/* - * A packet just assembled for transmission is represented by 1 or more - * struct iovec fragments (the first frag contains the portals header), - * followed by 0 or more struct bio_vec fragments. - * - * On the receive side, initially 1 struct iovec fragment is posted for - * receive (the header). Once the header has been received, the payload is - * received into either struct iovec or struct bio_vec fragments, depending on - * what the header matched or whether the message needs forwarding. - */ -struct ksock_conn; /* forward ref */ -struct ksock_peer; /* forward ref */ -struct ksock_route; /* forward ref */ -struct ksock_proto; /* forward ref */ - -struct ksock_tx { /* transmit packet */ - struct list_head tx_list; /* queue on conn for transmission etc - */ - struct list_head tx_zc_list; /* queue on peer for ZC request */ - atomic_t tx_refcount; /* tx reference count */ - int tx_nob; /* # packet bytes */ - int tx_resid; /* residual bytes */ - int tx_niov; /* # packet iovec frags */ - struct kvec *tx_iov; /* packet iovec frags */ - int tx_nkiov; /* # packet page frags */ - unsigned short tx_zc_aborted; /* aborted ZC request */ - unsigned short tx_zc_capable:1; /* payload is large enough for ZC */ - unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */ - unsigned short tx_nonblk:1; /* it's a non-blocking ACK */ - struct bio_vec *tx_kiov; /* packet page frags */ - struct ksock_conn *tx_conn; /* owning conn */ - struct lnet_msg *tx_lnetmsg; /* lnet message for lnet_finalize() - */ - unsigned long tx_deadline; /* when (in jiffies) tx times out */ - struct ksock_msg tx_msg; /* socklnd message buffer */ - int tx_desc_size; /* size of this descriptor */ - union { - struct { - struct kvec iov; /* virt hdr */ - struct bio_vec kiov[0]; /* paged payload */ - } paged; - struct { - struct kvec iov[1]; /* virt hdr + payload */ - } virt; - } tx_frags; -}; - -#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0])) - -/* network zero copy callback descriptor embedded in struct ksock_tx */ - -#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */ -#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */ -#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */ -#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */ -#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */ -#define SOCKNAL_RX_SLOP 6 /* skipping body */ - -struct ksock_conn { - struct ksock_peer *ksnc_peer; /* owning peer */ - struct ksock_route *ksnc_route; /* owning route */ - struct list_head ksnc_list; /* stash on peer's conn list */ - struct socket *ksnc_sock; /* actual socket */ - void *ksnc_saved_data_ready; /* socket's original - * data_ready() callback - */ - void *ksnc_saved_write_space; /* socket's original - * write_space() callback - */ - atomic_t ksnc_conn_refcount;/* conn refcount */ - atomic_t ksnc_sock_refcount;/* sock refcount */ - struct ksock_sched *ksnc_scheduler; /* who schedules this connection - */ - __u32 ksnc_myipaddr; /* my IP */ - __u32 ksnc_ipaddr; /* peer's IP */ - int ksnc_port; /* peer's port */ - signed int ksnc_type:3; /* type of connection, should be - * signed value - */ - unsigned int ksnc_closing:1; /* being shut down */ - unsigned int ksnc_flip:1; /* flip or not, only for V2.x */ - unsigned int ksnc_zc_capable:1; /* enable to ZC */ - struct ksock_proto *ksnc_proto; /* protocol for the connection */ - - /* reader */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a - * forwarding descriptor - */ - unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times - * out - */ - __u8 ksnc_rx_started; /* started receiving a message */ - __u8 ksnc_rx_ready; /* data ready to read */ - __u8 ksnc_rx_scheduled; /* being progressed */ - __u8 ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - struct iov_iter ksnc_rx_to; /* copy destination */ - struct kvec ksnc_rx_iov_space[LNET_MAX_IOV]; /* space for frag descriptors */ - __u32 ksnc_rx_csum; /* partial checksum for incoming - * data - */ - void *ksnc_cookie; /* rx lnet_finalize passthru arg - */ - struct ksock_msg ksnc_msg; /* incoming message buffer: - * V2.x message takes the - * whole struct - * V1.x message is a bare - * struct lnet_hdr, it's stored in - * ksnc_msg.ksm_u.lnetmsg - */ - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output - * space - */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet - * message or ZC-ACK - */ - unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out - */ - int ksnc_tx_bufnob; /* send buffer marker */ - atomic_t ksnc_tx_nob; /* # bytes queued */ - int ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - unsigned long ksnc_tx_last_post; /* time stamp of the last posted - * TX - */ -}; - -struct ksock_route { - struct list_head ksnr_list; /* chain on peer route list */ - struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */ - struct ksock_peer *ksnr_peer; /* owning peer */ - atomic_t ksnr_refcount; /* # users */ - unsigned long ksnr_timeout; /* when (in jiffies) reconnection - * can happen next - */ - long ksnr_retry_interval; /* how long between retries */ - __u32 ksnr_myipaddr; /* my IP */ - __u32 ksnr_ipaddr; /* IP address to connect to */ - int ksnr_port; /* port to connect to */ - unsigned int ksnr_scheduled:1; /* scheduled for attention */ - unsigned int ksnr_connecting:1; /* connection establishment in - * progress - */ - unsigned int ksnr_connected:4; /* connections established by - * type - */ - unsigned int ksnr_deleted:1; /* been removed from peer? */ - unsigned int ksnr_share_count; /* created explicitly? */ - int ksnr_conn_count; /* # conns established by this - * route - */ -}; - -#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */ - -struct ksock_peer { - struct list_head ksnp_list; /* stash on global peer list */ - unsigned long ksnp_last_alive; /* when (in jiffies) I was last - * alive - */ - struct lnet_process_id ksnp_id; /* who's on the other end(s) */ - atomic_t ksnp_refcount; /* # users */ - int ksnp_sharecount; /* lconf usage counter */ - int ksnp_closing; /* being closed */ - int ksnp_accepting; /* # passive connections pending - */ - int ksnp_error; /* errno on closing last conn */ - __u64 ksnp_zc_next_cookie; /* ZC completion cookie */ - __u64 ksnp_incarnation; /* latest known peer incarnation - */ - struct ksock_proto *ksnp_proto; /* latest known peer protocol */ - struct list_head ksnp_conns; /* all active connections */ - struct list_head ksnp_routes; /* routes */ - struct list_head ksnp_tx_queue; /* waiting packets */ - spinlock_t ksnp_lock; /* serialize, g_lock unsafe */ - struct list_head ksnp_zc_req_list; /* zero copy requests wait for - * ACK - */ - unsigned long ksnp_send_keepalive; /* time to send keepalive */ - struct lnet_ni *ksnp_ni; /* which network */ - int ksnp_n_passive_ips; /* # of... */ - - /* preferred local interfaces */ - __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; -}; - -struct ksock_connreq { - struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */ - struct lnet_ni *ksncr_ni; /* chosen NI */ - struct socket *ksncr_sock; /* accepted socket */ -}; - -extern struct ksock_nal_data ksocknal_data; -extern struct ksock_tunables ksocknal_tunables; - -#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */ -#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */ -#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not - * preferred - */ - -struct ksock_proto { - /* version number of protocol */ - int pro_version; - - /* handshake function */ - int (*pro_send_hello)(struct ksock_conn *, struct ksock_hello_msg *); - - /* handshake function */ - int (*pro_recv_hello)(struct ksock_conn *, struct ksock_hello_msg *, int); - - /* message pack */ - void (*pro_pack)(struct ksock_tx *); - - /* message unpack */ - void (*pro_unpack)(struct ksock_msg *); - - /* queue tx on the connection */ - struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *); - - /* queue ZC ack on the connection */ - int (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64); - - /* handle ZC request */ - int (*pro_handle_zcreq)(struct ksock_conn *, __u64, int); - - /* handle ZC ACK */ - int (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64); - - /* - * msg type matches the connection type: - * return value: - * return MATCH_NO : no - * return MATCH_YES : matching type - * return MATCH_MAY : can be backup - */ - int (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int); -}; - -extern struct ksock_proto ksocknal_protocol_v1x; -extern struct ksock_proto ksocknal_protocol_v2x; -extern struct ksock_proto ksocknal_protocol_v3x; - -#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR -#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR -#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR - -#ifndef CPU_MASK_NONE -#define CPU_MASK_NONE 0UL -#endif - -static inline int -ksocknal_route_mask(void) -{ - if (!*ksocknal_tunables.ksnd_typed_conns) - return (1 << SOCKLND_CONN_ANY); - - return ((1 << SOCKLND_CONN_CONTROL) | - (1 << SOCKLND_CONN_BULK_IN) | - (1 << SOCKLND_CONN_BULK_OUT)); -} - -static inline struct list_head * -ksocknal_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size; - - return &ksocknal_data.ksnd_peers[hash]; -} - -static inline void -ksocknal_conn_addref(struct ksock_conn *conn) -{ - LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); - atomic_inc(&conn->ksnc_conn_refcount); -} - -void ksocknal_queue_zombie_conn(struct ksock_conn *conn); -void ksocknal_finalize_zcreq(struct ksock_conn *conn); - -static inline void -ksocknal_conn_decref(struct ksock_conn *conn) -{ - LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_conn_refcount)) - ksocknal_queue_zombie_conn(conn); -} - -static inline int -ksocknal_connsock_addref(struct ksock_conn *conn) -{ - int rc = -ESHUTDOWN; - - read_lock(&ksocknal_data.ksnd_global_lock); - if (!conn->ksnc_closing) { - LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0); - atomic_inc(&conn->ksnc_sock_refcount); - rc = 0; - } - read_unlock(&ksocknal_data.ksnd_global_lock); - - return rc; -} - -static inline void -ksocknal_connsock_decref(struct ksock_conn *conn) -{ - LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) { - LASSERT(conn->ksnc_closing); - sock_release(conn->ksnc_sock); - conn->ksnc_sock = NULL; - ksocknal_finalize_zcreq(conn); - } -} - -static inline void -ksocknal_tx_addref(struct ksock_tx *tx) -{ - LASSERT(atomic_read(&tx->tx_refcount) > 0); - atomic_inc(&tx->tx_refcount); -} - -void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx); -void ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx); - -static inline void -ksocknal_tx_decref(struct ksock_tx *tx) -{ - LASSERT(atomic_read(&tx->tx_refcount) > 0); - if (atomic_dec_and_test(&tx->tx_refcount)) - ksocknal_tx_done(NULL, tx); -} - -static inline void -ksocknal_route_addref(struct ksock_route *route) -{ - LASSERT(atomic_read(&route->ksnr_refcount) > 0); - atomic_inc(&route->ksnr_refcount); -} - -void ksocknal_destroy_route(struct ksock_route *route); - -static inline void -ksocknal_route_decref(struct ksock_route *route) -{ - LASSERT(atomic_read(&route->ksnr_refcount) > 0); - if (atomic_dec_and_test(&route->ksnr_refcount)) - ksocknal_destroy_route(route); -} - -static inline void -ksocknal_peer_addref(struct ksock_peer *peer) -{ - LASSERT(atomic_read(&peer->ksnp_refcount) > 0); - atomic_inc(&peer->ksnp_refcount); -} - -void ksocknal_destroy_peer(struct ksock_peer *peer); - -static inline void -ksocknal_peer_decref(struct ksock_peer *peer) -{ - LASSERT(atomic_read(&peer->ksnp_refcount) > 0); - if (atomic_dec_and_test(&peer->ksnp_refcount)) - ksocknal_destroy_peer(peer); -} - -int ksocknal_startup(struct lnet_ni *ni); -void ksocknal_shutdown(struct lnet_ni *ni); -int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg); -int ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg); -int ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen); -int ksocknal_accept(struct lnet_ni *ni, struct socket *sock); - -int ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip, - int port); -struct ksock_peer *ksocknal_find_peer_locked(struct lnet_ni *ni, - struct lnet_process_id id); -struct ksock_peer *ksocknal_find_peer(struct lnet_ni *ni, - struct lnet_process_id id); -void ksocknal_peer_failed(struct ksock_peer *peer); -int ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, - struct socket *sock, int type); -void ksocknal_close_conn_locked(struct ksock_conn *conn, int why); -void ksocknal_terminate_conn(struct ksock_conn *conn); -void ksocknal_destroy_conn(struct ksock_conn *conn); -int ksocknal_close_peer_conns_locked(struct ksock_peer *peer, - __u32 ipaddr, int why); -int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why); -int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr); -struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer, - struct ksock_tx *tx, int nonblk); - -int ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, - struct lnet_process_id id); -struct ksock_tx *ksocknal_alloc_tx(int type, int size); -void ksocknal_free_tx(struct ksock_tx *tx); -struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk); -void ksocknal_next_tx_carrier(struct ksock_conn *conn); -void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn); -void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error); -void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive); -void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when); -int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name); -void ksocknal_thread_fini(void); -void ksocknal_launch_all_connections_locked(struct ksock_peer *peer); -struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer); -struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer); -int ksocknal_new_packet(struct ksock_conn *conn, int skip); -int ksocknal_scheduler(void *arg); -int ksocknal_connd(void *arg); -int ksocknal_reaper(void *arg); -int ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn, - lnet_nid_t peer_nid, struct ksock_hello_msg *hello); -int ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn, - struct ksock_hello_msg *hello, - struct lnet_process_id *id, - __u64 *incarnation); -void ksocknal_read_callback(struct ksock_conn *conn); -void ksocknal_write_callback(struct ksock_conn *conn); - -int ksocknal_lib_zc_capable(struct ksock_conn *conn); -void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn); -void ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn); -void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn); -void ksocknal_lib_push_conn(struct ksock_conn *conn); -int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn); -int ksocknal_lib_setup_sock(struct socket *so); -int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx); -int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx); -void ksocknal_lib_eager_ack(struct ksock_conn *conn); -int ksocknal_lib_recv(struct ksock_conn *conn); -int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, - int *rxmem, int *nagle); - -void ksocknal_read_callback(struct ksock_conn *conn); -void ksocknal_write_callback(struct ksock_conn *conn); - -int ksocknal_tunables_init(void); - -void ksocknal_lib_csum_tx(struct ksock_tx *tx); - -int ksocknal_lib_memory_pressure(struct ksock_conn *conn); -int ksocknal_lib_bind_thread_to_cpu(int id); - -#endif /* _SOCKLND_SOCKLND_H_ */ diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c deleted file mode 100644 index 036fecbcede8..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ /dev/null @@ -1,2592 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "socklnd.h" - -struct ksock_tx * -ksocknal_alloc_tx(int type, int size) -{ - struct ksock_tx *tx = NULL; - - if (type == KSOCK_MSG_NOOP) { - LASSERT(size == KSOCK_NOOP_TX_SIZE); - - /* searching for a noop tx in free list */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next, - struct ksock_tx, tx_list); - LASSERT(tx->tx_desc_size == size); - list_del(&tx->tx_list); - } - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } - - if (!tx) - tx = kzalloc(size, GFP_NOFS); - - if (!tx) - return NULL; - - atomic_set(&tx->tx_refcount, 1); - tx->tx_zc_aborted = 0; - tx->tx_zc_capable = 0; - tx->tx_zc_checked = 0; - tx->tx_desc_size = size; - - atomic_inc(&ksocknal_data.ksnd_nactive_txs); - - return tx; -} - -struct ksock_tx * -ksocknal_alloc_tx_noop(__u64 cookie, int nonblk) -{ - struct ksock_tx *tx; - - tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE); - if (!tx) { - CERROR("Can't allocate noop tx desc\n"); - return NULL; - } - - tx->tx_conn = NULL; - tx->tx_lnetmsg = NULL; - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1; - tx->tx_nonblk = nonblk; - - tx->tx_msg.ksm_csum = 0; - tx->tx_msg.ksm_type = KSOCK_MSG_NOOP; - tx->tx_msg.ksm_zc_cookies[0] = 0; - tx->tx_msg.ksm_zc_cookies[1] = cookie; - - return tx; -} - -void -ksocknal_free_tx(struct ksock_tx *tx) -{ - atomic_dec(&ksocknal_data.ksnd_nactive_txs); - - if (!tx->tx_lnetmsg && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) { - /* it's a noop tx */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs); - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } else { - kfree(tx); - } -} - -static int -ksocknal_send_iov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct kvec *iov = tx->tx_iov; - int nob; - int rc; - - LASSERT(tx->tx_niov > 0); - - /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */ - rc = ksocknal_lib_send_iov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return rc; - - nob = rc; - LASSERT(nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" iov */ - do { - LASSERT(tx->tx_niov > 0); - - if (nob < (int)iov->iov_len) { - iov->iov_base = (void *)((char *)iov->iov_base + nob); - iov->iov_len -= nob; - return rc; - } - - nob -= iov->iov_len; - tx->tx_iov = ++iov; - tx->tx_niov--; - } while (nob); - - return rc; -} - -static int -ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct bio_vec *kiov = tx->tx_kiov; - int nob; - int rc; - - LASSERT(!tx->tx_niov); - LASSERT(tx->tx_nkiov > 0); - - /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */ - rc = ksocknal_lib_send_kiov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return rc; - - nob = rc; - LASSERT(nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" kiov */ - do { - LASSERT(tx->tx_nkiov > 0); - - if (nob < (int)kiov->bv_len) { - kiov->bv_offset += nob; - kiov->bv_len -= nob; - return rc; - } - - nob -= (int)kiov->bv_len; - tx->tx_kiov = ++kiov; - tx->tx_nkiov--; - } while (nob); - - return rc; -} - -static int -ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx) -{ - int rc; - int bufnob; - - if (ksocknal_data.ksnd_stall_tx) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ksocknal_data.ksnd_stall_tx * HZ); - } - - LASSERT(tx->tx_resid); - - rc = ksocknal_connsock_addref(conn); - if (rc) { - LASSERT(conn->ksnc_closing); - return -ESHUTDOWN; - } - - do { - if (ksocknal_data.ksnd_enomem_tx > 0) { - /* testing... */ - ksocknal_data.ksnd_enomem_tx--; - rc = -EAGAIN; - } else if (tx->tx_niov) { - rc = ksocknal_send_iov(conn, tx); - } else { - rc = ksocknal_send_kiov(conn, tx); - } - - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - if (rc > 0) /* sent something? */ - conn->ksnc_tx_bufnob += rc; /* account it */ - - if (bufnob < conn->ksnc_tx_bufnob) { - /* - * allocated send buffer bytes < computed; infer - * something got ACKed - */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_tx_bufnob = bufnob; - mb(); - } - - if (rc <= 0) { /* Didn't write anything? */ - - if (!rc) /* some stacks return 0 instead of -EAGAIN */ - rc = -EAGAIN; - - /* Check if EAGAIN is due to memory pressure */ - if (rc == -EAGAIN && ksocknal_lib_memory_pressure(conn)) - rc = -ENOMEM; - - break; - } - - /* socket's wmem_queued now includes 'rc' bytes */ - atomic_sub(rc, &conn->ksnc_tx_nob); - rc = 0; - - } while (tx->tx_resid); - - ksocknal_connsock_decref(conn); - return rc; -} - -static int -ksocknal_recv_iter(struct ksock_conn *conn) -{ - int nob; - int rc; - - /* - * Never touch conn->ksnc_rx_to or change connection - * status inside ksocknal_lib_recv - */ - rc = ksocknal_lib_recv(conn); - - if (rc <= 0) - return rc; - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_rx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_left -= nob; - - iov_iter_advance(&conn->ksnc_rx_to, nob); - if (iov_iter_count(&conn->ksnc_rx_to)) - return -EAGAIN; - - return 1; -} - -static int -ksocknal_receive(struct ksock_conn *conn) -{ - /* - * Return 1 on success, 0 on EOF, < 0 on error. - * Caller checks ksnc_rx_to to determine - * progress/completion. - */ - int rc; - - if (ksocknal_data.ksnd_stall_rx) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ksocknal_data.ksnd_stall_rx * HZ); - } - - rc = ksocknal_connsock_addref(conn); - if (rc) { - LASSERT(conn->ksnc_closing); - return -ESHUTDOWN; - } - - for (;;) { - rc = ksocknal_recv_iter(conn); - if (rc <= 0) { - /* error/EOF or partial receive */ - if (rc == -EAGAIN) { - rc = 1; - } else if (!rc && conn->ksnc_rx_started) { - /* EOF in the middle of a message */ - rc = -EPROTO; - } - break; - } - - /* Completed a fragment */ - - if (!iov_iter_count(&conn->ksnc_rx_to)) { - rc = 1; - break; - } - } - - ksocknal_connsock_decref(conn); - return rc; -} - -void -ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx) -{ - struct lnet_msg *lnetmsg = tx->tx_lnetmsg; - int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO; - - LASSERT(ni || tx->tx_conn); - - if (tx->tx_conn) - ksocknal_conn_decref(tx->tx_conn); - - if (!ni && tx->tx_conn) - ni = tx->tx_conn->ksnc_peer->ksnp_ni; - - ksocknal_free_tx(tx); - if (lnetmsg) /* KSOCK_MSG_NOOP go without lnetmsg */ - lnet_finalize(ni, lnetmsg, rc); -} - -void -ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error) -{ - struct ksock_tx *tx; - - while (!list_empty(txlist)) { - tx = list_entry(txlist->next, struct ksock_tx, tx_list); - - if (error && tx->tx_lnetmsg) { - CNETERR("Deleting packet type %d len %d %s->%s\n", - le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type), - le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length), - libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)), - libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid))); - } else if (error) { - CNETERR("Deleting noop packet\n"); - } - - list_del(&tx->tx_list); - - LASSERT(atomic_read(&tx->tx_refcount) == 1); - ksocknal_tx_done(ni, tx); - } -} - -static void -ksocknal_check_zc_req(struct ksock_tx *tx) -{ - struct ksock_conn *conn = tx->tx_conn; - struct ksock_peer *peer = conn->ksnc_peer; - - /* - * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx - * to ksnp_zc_req_list if some fragment of this message should be sent - * zero-copy. Our peer will send an ACK containing this cookie when - * she has received this message to tell us we can signal completion. - * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on - * ksnp_zc_req_list. - */ - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_zc_capable); - - tx->tx_zc_checked = 1; - - if (conn->ksnc_proto == &ksocknal_protocol_v1x || - !conn->ksnc_zc_capable) - return; - - /* - * assign cookie and queue tx to pending list, it will be released when - * a matching ack is received. See ksocknal_handle_zcack() - */ - ksocknal_tx_addref(tx); - - spin_lock(&peer->ksnp_lock); - - /* ZC_REQ is going to be pinned to the peer */ - tx->tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - - LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); - - tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++; - - if (!peer->ksnp_zc_next_cookie) - peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; - - list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list); - - spin_unlock(&peer->ksnp_lock); -} - -static void -ksocknal_uncheck_zc_req(struct ksock_tx *tx) -{ - struct ksock_peer *peer = tx->tx_conn->ksnc_peer; - - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_zc_capable); - - tx->tx_zc_checked = 0; - - spin_lock(&peer->ksnp_lock); - - if (!tx->tx_msg.ksm_zc_cookies[0]) { - /* Not waiting for an ACK */ - spin_unlock(&peer->ksnp_lock); - return; - } - - tx->tx_msg.ksm_zc_cookies[0] = 0; - list_del(&tx->tx_zc_list); - - spin_unlock(&peer->ksnp_lock); - - ksocknal_tx_decref(tx); -} - -static int -ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx) -{ - int rc; - - if (tx->tx_zc_capable && !tx->tx_zc_checked) - ksocknal_check_zc_req(tx); - - rc = ksocknal_transmit(conn, tx); - - CDEBUG(D_NET, "send(%d) %d\n", tx->tx_resid, rc); - - if (!tx->tx_resid) { - /* Sent everything OK */ - LASSERT(!rc); - - return 0; - } - - if (rc == -EAGAIN) - return rc; - - if (rc == -ENOMEM) { - static int counter; - - counter++; /* exponential backoff warnings */ - if ((counter & (-counter)) == counter) - CWARN("%u ENOMEM tx %p\n", counter, conn); - - /* Queue on ksnd_enomem_conns for retry after a timeout */ - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - /* enomem list takes over scheduler's ref... */ - LASSERT(conn->ksnc_tx_scheduled); - list_add_tail(&conn->ksnc_tx_list, - &ksocknal_data.ksnd_enomem_conns); - if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(), - SOCKNAL_ENOMEM_RETRY), - ksocknal_data.ksnd_reaper_waketime)) - wake_up(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - return rc; - } - - /* Actual error */ - LASSERT(rc < 0); - - if (!conn->ksnc_closing) { - switch (rc) { - case -ECONNRESET: - LCONSOLE_WARN("Host %pI4h reset our connection while we were sending data; it may have rebooted.\n", - &conn->ksnc_ipaddr); - break; - default: - LCONSOLE_WARN("There was an unexpected network error while writing to %pI4h: %d.\n", - &conn->ksnc_ipaddr, rc); - break; - } - CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pI4h:%d\n", - conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - } - - if (tx->tx_zc_checked) - ksocknal_uncheck_zc_req(tx); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings(conn, (conn->ksnc_closing) ? 0 : rc); - - return rc; -} - -static void -ksocknal_launch_connection_locked(struct ksock_route *route) -{ - /* called holding write lock on ksnd_global_lock */ - - LASSERT(!route->ksnr_scheduled); - LASSERT(!route->ksnr_connecting); - LASSERT(ksocknal_route_mask() & ~route->ksnr_connected); - - route->ksnr_scheduled = 1; /* scheduling conn for connd */ - ksocknal_route_addref(route); /* extra ref for connd */ - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - - list_add_tail(&route->ksnr_connd_list, - &ksocknal_data.ksnd_connd_routes); - wake_up(&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); -} - -void -ksocknal_launch_all_connections_locked(struct ksock_peer *peer) -{ - struct ksock_route *route; - - /* called holding write lock on ksnd_global_lock */ - for (;;) { - /* launch any/all connections that need it */ - route = ksocknal_find_connectable_route_locked(peer); - if (!route) - return; - - ksocknal_launch_connection_locked(route); - } -} - -struct ksock_conn * -ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, - int nonblk) -{ - struct list_head *tmp; - struct ksock_conn *conn; - struct ksock_conn *typed = NULL; - struct ksock_conn *fallback = NULL; - int tnob = 0; - int fnob = 0; - - list_for_each(tmp, &peer->ksnp_conns) { - struct ksock_conn *c; - int nob, rc; - - c = list_entry(tmp, struct ksock_conn, ksnc_list); - nob = atomic_read(&c->ksnc_tx_nob) + - c->ksnc_sock->sk->sk_wmem_queued; - - LASSERT(!c->ksnc_closing); - LASSERT(c->ksnc_proto && - c->ksnc_proto->pro_match_tx); - - rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk); - - switch (rc) { - default: - LBUG(); - case SOCKNAL_MATCH_NO: /* protocol rejected the tx */ - continue; - - case SOCKNAL_MATCH_YES: /* typed connection */ - if (!typed || tnob > nob || - (tnob == nob && *ksocknal_tunables.ksnd_round_robin && - cfs_time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) { - typed = c; - tnob = nob; - } - break; - - case SOCKNAL_MATCH_MAY: /* fallback connection */ - if (!fallback || fnob > nob || - (fnob == nob && *ksocknal_tunables.ksnd_round_robin && - cfs_time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) { - fallback = c; - fnob = nob; - } - break; - } - } - - /* prefer the typed selection */ - conn = (typed) ? typed : fallback; - - if (conn) - conn->ksnc_tx_last_post = cfs_time_current(); - - return conn; -} - -void -ksocknal_tx_prep(struct ksock_conn *conn, struct ksock_tx *tx) -{ - conn->ksnc_proto->pro_pack(tx); - - atomic_add(tx->tx_nob, &conn->ksnc_tx_nob); - ksocknal_conn_addref(conn); /* +1 ref for tx */ - tx->tx_conn = conn; -} - -void -ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn) -{ - struct ksock_sched *sched = conn->ksnc_scheduler; - struct ksock_msg *msg = &tx->tx_msg; - struct ksock_tx *ztx = NULL; - int bufnob = 0; - - /* - * called holding global lock (read or irq-write) and caller may - * not have dropped this lock between finding conn and calling me, - * so we don't need the {get,put}connsock dance to deref - * ksnc_sock... - */ - LASSERT(!conn->ksnc_closing); - - CDEBUG(D_NET, "Sending to %s ip %pI4h:%d\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port); - - ksocknal_tx_prep(conn, tx); - - /* - * Ensure the frags we've been given EXACTLY match the number of - * bytes we want to send. Many TCP/IP stacks disregard any total - * size parameters passed to them and just look at the frags. - * - * We always expect at least 1 mapped fragment containing the - * complete ksocknal message header. - */ - LASSERT(lnet_iov_nob(tx->tx_niov, tx->tx_iov) + - lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) == - (unsigned int)tx->tx_nob); - LASSERT(tx->tx_niov >= 1); - LASSERT(tx->tx_resid == tx->tx_nob); - - CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n", - tx, (tx->tx_lnetmsg) ? tx->tx_lnetmsg->msg_hdr.type : - KSOCK_MSG_NOOP, - tx->tx_nob, tx->tx_niov, tx->tx_nkiov); - - /* - * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__ - * but they're used inside spinlocks a lot. - */ - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - spin_lock_bh(&sched->kss_lock); - - if (list_empty(&conn->ksnc_tx_queue) && !bufnob) { - /* First packet starts the timeout */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */ - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_tx_bufnob = 0; - mb(); /* order with adding to tx_queue */ - } - - if (msg->ksm_type == KSOCK_MSG_NOOP) { - /* - * The packet is noop ZC ACK, try to piggyback the ack_cookie - * on a normal packet so I don't need to send it - */ - LASSERT(msg->ksm_zc_cookies[1]); - LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); - - /* ZC ACK piggybacked on ztx release tx later */ - if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0)) - ztx = tx; - } else { - /* - * It's a normal packet - can it piggback a noop zc-ack that - * has been queued already? - */ - LASSERT(!msg->ksm_zc_cookies[1]); - LASSERT(conn->ksnc_proto->pro_queue_tx_msg); - - ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx); - /* ztx will be released later */ - } - - if (ztx) { - atomic_sub(ztx->tx_nob, &conn->ksnc_tx_nob); - list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs); - } - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) { /* not scheduled to send */ - /* +1 ref for scheduler */ - ksocknal_conn_addref(conn); - list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - wake_up(&sched->kss_waitq); - } - - spin_unlock_bh(&sched->kss_lock); -} - -struct ksock_route * -ksocknal_find_connectable_route_locked(struct ksock_peer *peer) -{ - unsigned long now = cfs_time_current(); - struct list_head *tmp; - struct ksock_route *route; - - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - LASSERT(!route->ksnr_connecting || route->ksnr_scheduled); - - /* connections being established */ - if (route->ksnr_scheduled) - continue; - - /* all route types connected ? */ - if (!(ksocknal_route_mask() & ~route->ksnr_connected)) - continue; - - if (!(!route->ksnr_retry_interval || /* first attempt */ - cfs_time_aftereq(now, route->ksnr_timeout))) { - CDEBUG(D_NET, - "Too soon to retry route %pI4h (cnted %d, interval %ld, %ld secs later)\n", - &route->ksnr_ipaddr, - route->ksnr_connected, - route->ksnr_retry_interval, - cfs_duration_sec(route->ksnr_timeout - now)); - continue; - } - - return route; - } - - return NULL; -} - -struct ksock_route * -ksocknal_find_connecting_route_locked(struct ksock_peer *peer) -{ - struct list_head *tmp; - struct ksock_route *route; - - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - LASSERT(!route->ksnr_connecting || route->ksnr_scheduled); - - if (route->ksnr_scheduled) - return route; - } - - return NULL; -} - -int -ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, - struct lnet_process_id id) -{ - struct ksock_peer *peer; - struct ksock_conn *conn; - rwlock_t *g_lock; - int retry; - int rc; - - LASSERT(!tx->tx_conn); - - g_lock = &ksocknal_data.ksnd_global_lock; - - for (retry = 0;; retry = 1) { - read_lock(g_lock); - peer = ksocknal_find_peer_locked(ni, id); - if (peer) { - if (!ksocknal_find_connectable_route_locked(peer)) { - conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk); - if (conn) { - /* - * I've got no routes that need to be - * connecting and I do have an actual - * connection... - */ - ksocknal_queue_tx_locked(tx, conn); - read_unlock(g_lock); - return 0; - } - } - } - - /* I'll need a write lock... */ - read_unlock(g_lock); - - write_lock_bh(g_lock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer) - break; - - write_unlock_bh(g_lock); - - if (id.pid & LNET_PID_USERFLAG) { - CERROR("Refusing to create a connection to userspace process %s\n", - libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - rc = ksocknal_add_peer(ni, id, - LNET_NIDADDR(id.nid), - lnet_acceptor_port()); - if (rc) { - CERROR("Can't add peer %s: %d\n", - libcfs_id2str(id), rc); - return rc; - } - } - - ksocknal_launch_all_connections_locked(peer); - - conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk); - if (conn) { - /* Connection exists; queue message on it */ - ksocknal_queue_tx_locked(tx, conn); - write_unlock_bh(g_lock); - return 0; - } - - if (peer->ksnp_accepting > 0 || - ksocknal_find_connecting_route_locked(peer)) { - /* the message is going to be pinned to the peer */ - tx->tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - - /* Queue the message until a connection is established */ - list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue); - write_unlock_bh(g_lock); - return 0; - } - - write_unlock_bh(g_lock); - - /* NB Routes may be ignored if connections to them failed recently */ - CNETERR("No usable routes to %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; -} - -int -ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) -{ - int mpflag = 1; - int type = lntmsg->msg_type; - struct lnet_process_id target = lntmsg->msg_target; - unsigned int payload_niov = lntmsg->msg_niov; - struct kvec *payload_iov = lntmsg->msg_iov; - struct bio_vec *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - struct ksock_tx *tx; - int desc_size; - int rc; - - /* - * NB 'private' is different depending on what we're sending. - * Just ignore it... - */ - CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT(!payload_nob || payload_niov > 0); - LASSERT(payload_niov <= LNET_MAX_IOV); - /* payload is either all vaddrs or all pages */ - LASSERT(!(payload_kiov && payload_iov)); - LASSERT(!in_interrupt()); - - if (payload_iov) - desc_size = offsetof(struct ksock_tx, - tx_frags.virt.iov[1 + payload_niov]); - else - desc_size = offsetof(struct ksock_tx, - tx_frags.paged.kiov[payload_niov]); - - if (lntmsg->msg_vmflush) - mpflag = cfs_memory_pressure_get_and_set(); - tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size); - if (!tx) { - CERROR("Can't allocate tx desc type %d size %d\n", - type, desc_size); - if (lntmsg->msg_vmflush) - cfs_memory_pressure_restore(mpflag); - return -ENOMEM; - } - - tx->tx_conn = NULL; /* set when assigned a conn */ - tx->tx_lnetmsg = lntmsg; - - if (payload_iov) { - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1 + - lnet_extract_iov(payload_niov, &tx->tx_iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); - } else { - tx->tx_niov = 1; - tx->tx_iov = &tx->tx_frags.paged.iov; - tx->tx_kiov = tx->tx_frags.paged.kiov; - tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov, - payload_niov, payload_kiov, - payload_offset, payload_nob); - - if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload) - tx->tx_zc_capable = 1; - } - - tx->tx_msg.ksm_csum = 0; - tx->tx_msg.ksm_type = KSOCK_MSG_LNET; - tx->tx_msg.ksm_zc_cookies[0] = 0; - tx->tx_msg.ksm_zc_cookies[1] = 0; - - /* The first fragment will be set later in pro_pack */ - rc = ksocknal_launch_packet(ni, tx, target); - if (!mpflag) - cfs_memory_pressure_restore(mpflag); - - if (!rc) - return 0; - - ksocknal_free_tx(tx); - return -EIO; -} - -int -ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name) -{ - struct task_struct *task = kthread_run(fn, arg, "%s", name); - - if (IS_ERR(task)) - return PTR_ERR(task); - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads++; - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - return 0; -} - -void -ksocknal_thread_fini(void) -{ - write_lock_bh(&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads--; - write_unlock_bh(&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) -{ - static char ksocknal_slop_buffer[4096]; - struct kvec *kvec = conn->ksnc_rx_iov_space; - - int nob; - unsigned int niov; - int skipped; - - LASSERT(conn->ksnc_proto); - - if (*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) { - /* Remind the socket to ack eagerly... */ - ksocknal_lib_eager_ack(conn); - } - - if (!nob_to_skip) { /* right at next packet boundary now */ - conn->ksnc_rx_started = 0; - mb(); /* racing with timeout thread */ - - switch (conn->ksnc_proto->pro_version) { - case KSOCK_PROTO_V2: - case KSOCK_PROTO_V3: - conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER; - kvec->iov_base = &conn->ksnc_msg; - kvec->iov_len = offsetof(struct ksock_msg, ksm_u); - conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u); - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, - 1, offsetof(struct ksock_msg, ksm_u)); - break; - - case KSOCK_PROTO_V1: - /* Receiving bare struct lnet_hdr */ - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - kvec->iov_len = sizeof(struct lnet_hdr); - conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr); - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, - 1, sizeof(struct lnet_hdr)); - break; - - default: - LBUG(); - } - conn->ksnc_rx_csum = ~0; - return 1; - } - - /* - * Set up to skip as much as possible now. If there's more left - * (ran out of iov entries) we'll get called again - */ - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - skipped = 0; - niov = 0; - - do { - nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer)); - - kvec[niov].iov_base = ksocknal_slop_buffer; - kvec[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -= nob; - - } while (nob_to_skip && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec)); - - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped); - return 0; -} - -static int -ksocknal_process_receive(struct ksock_conn *conn) -{ - struct kvec *kvec = conn->ksnc_rx_iov_space; - struct lnet_hdr *lhdr; - struct lnet_process_id *id; - int rc; - - LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); - - /* NB: sched lock NOT held */ - /* SOCKNAL_RX_LNET_HEADER is here for backward compatibility */ - LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - again: - if (iov_iter_count(&conn->ksnc_rx_to)) { - rc = ksocknal_receive(conn); - - if (rc <= 0) { - LASSERT(rc != -EAGAIN); - - if (!rc) - CDEBUG(D_NET, "[%p] EOF from %s ip %pI4h:%d\n", - conn, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - else if (!conn->ksnc_closing) - CERROR("[%p] Error %d on read from %s ip %pI4h:%d\n", - conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings(conn, - (conn->ksnc_closing) ? 0 : rc); - return (!rc ? -ESHUTDOWN : rc); - } - - if (iov_iter_count(&conn->ksnc_rx_to)) { - /* short read */ - return -EAGAIN; - } - } - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_flip) { - __swab32s(&conn->ksnc_msg.ksm_type); - __swab32s(&conn->ksnc_msg.ksm_csum); - __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]); - __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]); - } - - if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) { - CERROR("%s: Unknown message type: %x\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_type); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return -EPROTO; - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_csum && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - /* NOOP Checksum error */ - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return -EIO; - } - - if (conn->ksnc_msg.ksm_zc_cookies[1]) { - __u64 cookie = 0; - - LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) - cookie = conn->ksnc_msg.ksm_zc_cookies[0]; - - rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie, - conn->ksnc_msg.ksm_zc_cookies[1]); - - if (rc) { - CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - cookie, conn->ksnc_msg.ksm_zc_cookies[1]); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return rc; - } - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) { - ksocknal_new_packet(conn, 0); - return 0; /* NOOP is done and just return */ - } - - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg); - - kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - kvec->iov_len = sizeof(struct ksock_lnet_msg); - - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, - 1, sizeof(struct ksock_lnet_msg)); - - goto again; /* read lnet header now */ - - case SOCKNAL_RX_LNET_HEADER: - /* unpack message header */ - conn->ksnc_proto->pro_unpack(&conn->ksnc_msg); - - if (conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) { - /* Userspace peer */ - lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - id = &conn->ksnc_peer->ksnp_id; - - /* Substitute process ID assigned at connection time */ - lhdr->src_pid = cpu_to_le32(id->pid); - lhdr->src_nid = cpu_to_le64(id->nid); - } - - conn->ksnc_rx_state = SOCKNAL_RX_PARSE; - ksocknal_conn_addref(conn); /* ++ref while parsing */ - - rc = lnet_parse(conn->ksnc_peer->ksnp_ni, - &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr, - conn->ksnc_peer->ksnp_id.nid, conn, 0); - if (rc < 0) { - /* I just received garbage: give up on this conn */ - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, rc); - ksocknal_conn_decref(conn); - return -EPROTO; - } - - /* I'm racing with ksocknal_recv() */ - LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); - - if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD) - return 0; - - /* ksocknal_recv() got called */ - goto again; - - case SOCKNAL_RX_LNET_PAYLOAD: - /* payload all received */ - rc = 0; - - if (!conn->ksnc_rx_nob_left && /* not truncating */ - conn->ksnc_msg.ksm_csum && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - rc = -EIO; - } - - if (!rc && conn->ksnc_msg.ksm_zc_cookies[0]) { - LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); - - lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - id = &conn->ksnc_peer->ksnp_id; - - rc = conn->ksnc_proto->pro_handle_zcreq(conn, - conn->ksnc_msg.ksm_zc_cookies[0], - *ksocknal_tunables.ksnd_nonblk_zcack || - le64_to_cpu(lhdr->src_nid) != id->nid); - } - - lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc); - - if (rc) { - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, rc); - return -EPROTO; - } - /* Fall through */ - - case SOCKNAL_RX_SLOP: - /* starting new packet? */ - if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left)) - return 0; /* come back later */ - goto again; /* try to finish reading slop now */ - - default: - break; - } - - /* Not Reached */ - LBUG(); - return -EINVAL; /* keep gcc happy */ -} - -int -ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, - int delayed, struct iov_iter *to, unsigned int rlen) -{ - struct ksock_conn *conn = private; - struct ksock_sched *sched = conn->ksnc_scheduler; - - LASSERT(iov_iter_count(to) <= rlen); - LASSERT(to->nr_segs <= LNET_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_left = rlen; - - conn->ksnc_rx_to = *to; - - LASSERT(conn->ksnc_rx_scheduled); - - spin_lock_bh(&sched->kss_lock); - - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_PARSE_WAIT: - list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); - wake_up(&sched->kss_waitq); - LASSERT(conn->ksnc_rx_ready); - break; - - case SOCKNAL_RX_PARSE: - /* scheduler hasn't noticed I'm parsing yet */ - break; - } - - conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD; - - spin_unlock_bh(&sched->kss_lock); - ksocknal_conn_decref(conn); - return 0; -} - -static inline int -ksocknal_sched_cansleep(struct ksock_sched *sched) -{ - int rc; - - spin_lock_bh(&sched->kss_lock); - - rc = !ksocknal_data.ksnd_shuttingdown && - list_empty(&sched->kss_rx_conns) && - list_empty(&sched->kss_tx_conns); - - spin_unlock_bh(&sched->kss_lock); - return rc; -} - -int ksocknal_scheduler(void *arg) -{ - struct ksock_sched_info *info; - struct ksock_sched *sched; - struct ksock_conn *conn; - struct ksock_tx *tx; - int rc; - int nloops = 0; - long id = (long)arg; - - info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)]; - sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)]; - - rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt); - if (rc) { - CWARN("Can't set CPU partition affinity to %d: %d\n", - info->ksi_cpt, rc); - } - - spin_lock_bh(&sched->kss_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty(&sched->kss_rx_conns)) { - conn = list_entry(sched->kss_rx_conns.next, - struct ksock_conn, ksnc_rx_list); - list_del(&conn->ksnc_rx_list); - - LASSERT(conn->ksnc_rx_scheduled); - LASSERT(conn->ksnc_rx_ready); - - /* - * clear rx_ready in case receive isn't complete. - * Do it BEFORE we call process_recv, since - * data_ready can set it any time after we release - * kss_lock. - */ - conn->ksnc_rx_ready = 0; - spin_unlock_bh(&sched->kss_lock); - - rc = ksocknal_process_receive(conn); - - spin_lock_bh(&sched->kss_lock); - - /* I'm the only one that can clear this flag */ - LASSERT(conn->ksnc_rx_scheduled); - - /* Did process_receive get everything it wanted? */ - if (!rc) - conn->ksnc_rx_ready = 1; - - if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) { - /* - * Conn blocked waiting for ksocknal_recv() - * I change its state (under lock) to signal - * it can be rescheduled - */ - conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT; - } else if (conn->ksnc_rx_ready) { - /* reschedule for rx */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - } else { - conn->ksnc_rx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - - if (!list_empty(&sched->kss_tx_conns)) { - LIST_HEAD(zlist); - - if (!list_empty(&sched->kss_zombie_noop_txs)) { - list_add(&zlist, &sched->kss_zombie_noop_txs); - list_del_init(&sched->kss_zombie_noop_txs); - } - - conn = list_entry(sched->kss_tx_conns.next, - struct ksock_conn, ksnc_tx_list); - list_del(&conn->ksnc_tx_list); - - LASSERT(conn->ksnc_tx_scheduled); - LASSERT(conn->ksnc_tx_ready); - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - - tx = list_entry(conn->ksnc_tx_queue.next, - struct ksock_tx, tx_list); - - if (conn->ksnc_tx_carrier == tx) - ksocknal_next_tx_carrier(conn); - - /* dequeue now so empty list => more to send */ - list_del(&tx->tx_list); - - /* - * Clear tx_ready in case send isn't complete. Do - * it BEFORE we call process_transmit, since - * write_space can set it any time after we release - * kss_lock. - */ - conn->ksnc_tx_ready = 0; - spin_unlock_bh(&sched->kss_lock); - - if (!list_empty(&zlist)) { - /* - * free zombie noop txs, it's fast because - * noop txs are just put in freelist - */ - ksocknal_txlist_done(NULL, &zlist, 0); - } - - rc = ksocknal_process_transmit(conn, tx); - - if (rc == -ENOMEM || rc == -EAGAIN) { - /* - * Incomplete send: replace tx on HEAD of - * tx_queue - */ - spin_lock_bh(&sched->kss_lock); - list_add(&tx->tx_list, &conn->ksnc_tx_queue); - } else { - /* Complete send; tx -ref */ - ksocknal_tx_decref(tx); - - spin_lock_bh(&sched->kss_lock); - /* assume space for more */ - conn->ksnc_tx_ready = 1; - } - - if (rc == -ENOMEM) { - /* - * Do nothing; after a short timeout, this - * conn will be reposted on kss_tx_conns. - */ - } else if (conn->ksnc_tx_ready && - !list_empty(&conn->ksnc_tx_queue)) { - /* reschedule for tx */ - list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); - } else { - conn->ksnc_tx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */ - spin_unlock_bh(&sched->kss_lock); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ - rc = wait_event_interruptible_exclusive( - sched->kss_waitq, - !ksocknal_sched_cansleep(sched)); - LASSERT(!rc); - } else { - cond_resched(); - } - - spin_lock_bh(&sched->kss_lock); - } - } - - spin_unlock_bh(&sched->kss_lock); - ksocknal_thread_fini(); - return 0; -} - -/* - * Add connection to kss_rx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_read_callback(struct ksock_conn *conn) -{ - struct ksock_sched *sched; - - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - - conn->ksnc_rx_ready = 1; - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - wake_up(&sched->kss_waitq); - } - spin_unlock_bh(&sched->kss_lock); -} - -/* - * Add connection to kss_tx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_write_callback(struct ksock_conn *conn) -{ - struct ksock_sched *sched; - - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && /* not being progressed */ - !list_empty(&conn->ksnc_tx_queue)) { /* packets to send */ - list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - wake_up(&sched->kss_waitq); - } - - spin_unlock_bh(&sched->kss_lock); -} - -static struct ksock_proto * -ksocknal_parse_proto_version(struct ksock_hello_msg *hello) -{ - __u32 version = 0; - - if (hello->kshm_magic == LNET_PROTO_MAGIC) - version = hello->kshm_version; - else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC)) - version = __swab32(hello->kshm_version); - - if (version) { -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol == 1) - return NULL; - - if (*ksocknal_tunables.ksnd_protocol == 2 && - version == KSOCK_PROTO_V3) - return NULL; -#endif - if (version == KSOCK_PROTO_V2) - return &ksocknal_protocol_v2x; - - if (version == KSOCK_PROTO_V3) - return &ksocknal_protocol_v3x; - - return NULL; - } - - if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { - struct lnet_magicversion *hmv = (struct lnet_magicversion *)hello; - - BUILD_BUG_ON(sizeof(struct lnet_magicversion) != - offsetof(struct ksock_hello_msg, kshm_src_nid)); - - if (hmv->version_major == cpu_to_le16(KSOCK_PROTO_V1_MAJOR) && - hmv->version_minor == cpu_to_le16(KSOCK_PROTO_V1_MINOR)) - return &ksocknal_protocol_v1x; - } - - return NULL; -} - -int -ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn, - lnet_nid_t peer_nid, struct ksock_hello_msg *hello) -{ - /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ - struct ksock_net *net = (struct ksock_net *)ni->ni_data; - - LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES); - - /* rely on caller to hold a ref on socket so it wouldn't disappear */ - LASSERT(conn->ksnc_proto); - - hello->kshm_src_nid = ni->ni_nid; - hello->kshm_dst_nid = peer_nid; - hello->kshm_src_pid = the_lnet.ln_pid; - - hello->kshm_src_incarnation = net->ksnn_incarnation; - hello->kshm_ctype = conn->ksnc_type; - - return conn->ksnc_proto->pro_send_hello(conn, hello); -} - -static int -ksocknal_invert_type(int type) -{ - switch (type) { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return type; - case SOCKLND_CONN_BULK_IN: - return SOCKLND_CONN_BULK_OUT; - case SOCKLND_CONN_BULK_OUT: - return SOCKLND_CONN_BULK_IN; - default: - return SOCKLND_CONN_NONE; - } -} - -int -ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn, - struct ksock_hello_msg *hello, - struct lnet_process_id *peerid, - __u64 *incarnation) -{ - /* Return < 0 fatal error - * 0 success - * EALREADY lost connection race - * EPROTO protocol version mismatch - */ - struct socket *sock = conn->ksnc_sock; - int active = !!conn->ksnc_proto; - int timeout; - int proto_match; - int rc; - struct ksock_proto *proto; - struct lnet_process_id recv_id; - - /* socket type set on active connections - not set on passive */ - LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE)); - - timeout = active ? *ksocknal_tunables.ksnd_timeout : - lnet_acceptor_timeout(); - - rc = lnet_sock_read(sock, &hello->kshm_magic, - sizeof(hello->kshm_magic), timeout); - if (rc) { - CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0); - return rc; - } - - if (hello->kshm_magic != LNET_PROTO_MAGIC && - hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) && - hello->kshm_magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { - /* Unexpected magic! */ - CERROR("Bad magic(1) %#08x (%#08x expected) from %pI4h\n", - __cpu_to_le32(hello->kshm_magic), - LNET_PROTO_TCP_MAGIC, - &conn->ksnc_ipaddr); - return -EPROTO; - } - - rc = lnet_sock_read(sock, &hello->kshm_version, - sizeof(hello->kshm_version), timeout); - if (rc) { - CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0); - return rc; - } - - proto = ksocknal_parse_proto_version(hello); - if (!proto) { - if (!active) { - /* unknown protocol from peer, tell peer my protocol */ - conn->ksnc_proto = &ksocknal_protocol_v3x; -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol == 2) - conn->ksnc_proto = &ksocknal_protocol_v2x; - else if (*ksocknal_tunables.ksnd_protocol == 1) - conn->ksnc_proto = &ksocknal_protocol_v1x; -#endif - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, ni->ni_nid, hello); - } - - CERROR("Unknown protocol version (%d.x expected) from %pI4h\n", - conn->ksnc_proto->pro_version, - &conn->ksnc_ipaddr); - - return -EPROTO; - } - - proto_match = (conn->ksnc_proto == proto); - conn->ksnc_proto = proto; - - /* receive the rest of hello message anyway */ - rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout); - if (rc) { - CERROR("Error %d reading or checking hello from from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0); - return rc; - } - - *incarnation = hello->kshm_src_incarnation; - - if (hello->kshm_src_nid == LNET_NID_ANY) { - CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pI4h\n", - &conn->ksnc_ipaddr); - return -EPROTO; - } - - if (!active && - conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - /* Userspace NAL assigns peer process ID from socket */ - recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG; - recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - conn->ksnc_ipaddr); - } else { - recv_id.nid = hello->kshm_src_nid; - recv_id.pid = hello->kshm_src_pid; - } - - if (!active) { - *peerid = recv_id; - - /* peer determines type */ - conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype); - if (conn->ksnc_type == SOCKLND_CONN_NONE) { - CERROR("Unexpected type %d from %s ip %pI4h\n", - hello->kshm_ctype, libcfs_id2str(*peerid), - &conn->ksnc_ipaddr); - return -EPROTO; - } - - return 0; - } - - if (peerid->pid != recv_id.pid || - peerid->nid != recv_id.nid) { - LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host %pI4h, but they claimed they were %s; please check your Lustre configuration.\n", - libcfs_id2str(*peerid), - &conn->ksnc_ipaddr, - libcfs_id2str(recv_id)); - return -EPROTO; - } - - if (hello->kshm_ctype == SOCKLND_CONN_NONE) { - /* Possible protocol mismatch or I lost the connection race */ - return proto_match ? EALREADY : EPROTO; - } - - if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) { - CERROR("Mismatched types: me %d, %s ip %pI4h %d\n", - conn->ksnc_type, libcfs_id2str(*peerid), - &conn->ksnc_ipaddr, hello->kshm_ctype); - return -EPROTO; - } - - return 0; -} - -static int -ksocknal_connect(struct ksock_route *route) -{ - LIST_HEAD(zombies); - struct ksock_peer *peer = route->ksnr_peer; - int type; - int wanted; - struct socket *sock; - unsigned long deadline; - int retry_later = 0; - int rc = 0; - - deadline = cfs_time_add(cfs_time_current(), - *ksocknal_tunables.ksnd_timeout * HZ); - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - LASSERT(route->ksnr_scheduled); - LASSERT(!route->ksnr_connecting); - - route->ksnr_connecting = 1; - - for (;;) { - wanted = ksocknal_route_mask() & ~route->ksnr_connected; - - /* - * stop connecting if peer/route got closed under me, or - * route got connected while queued - */ - if (peer->ksnp_closing || route->ksnr_deleted || - !wanted) { - retry_later = 0; - break; - } - - /* reschedule if peer is connecting to me */ - if (peer->ksnp_accepting > 0) { - CDEBUG(D_NET, - "peer %s(%d) already connecting to me, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid), - peer->ksnp_accepting); - retry_later = 1; - } - - if (retry_later) /* needs reschedule */ - break; - - if (wanted & BIT(SOCKLND_CONN_ANY)) { - type = SOCKLND_CONN_ANY; - } else if (wanted & BIT(SOCKLND_CONN_CONTROL)) { - type = SOCKLND_CONN_CONTROL; - } else if (wanted & BIT(SOCKLND_CONN_BULK_IN)) { - type = SOCKLND_CONN_BULK_IN; - } else { - LASSERT(wanted & BIT(SOCKLND_CONN_BULK_OUT)); - type = SOCKLND_CONN_BULK_OUT; - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - if (cfs_time_aftereq(cfs_time_current(), deadline)) { - rc = -ETIMEDOUT; - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - rc = lnet_connect(&sock, peer->ksnp_id.nid, - route->ksnr_myipaddr, - route->ksnr_ipaddr, route->ksnr_port); - if (rc) - goto failed; - - rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); - if (rc < 0) { - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - /* - * A +ve RC means I have to retry because I lost the connection - * race or I have to renegotiate protocol version - */ - retry_later = (rc); - if (retry_later) - CDEBUG(D_NET, "peer %s: conn race, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid)); - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - } - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - if (retry_later) { - /* - * re-queue for attention; this frees me up to handle - * the peer's incoming connection request - */ - if (rc == EALREADY || - (!rc && peer->ksnp_accepting > 0)) { - /* - * We want to introduce a delay before next - * attempt to connect if we lost conn race, - * but the race is resolved quickly usually, - * so min_reconnectms should be good heuristic - */ - route->ksnr_retry_interval = - *ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000; - route->ksnr_timeout = cfs_time_add(cfs_time_current(), - route->ksnr_retry_interval); - } - - ksocknal_launch_connection_locked(route); - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - return retry_later; - - failed: - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - /* This is a retry rather than a new connection */ - route->ksnr_retry_interval *= 2; - route->ksnr_retry_interval = - max(route->ksnr_retry_interval, - (long)*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000); - route->ksnr_retry_interval = - min(route->ksnr_retry_interval, - (long)*ksocknal_tunables.ksnd_max_reconnectms * HZ / 1000); - - LASSERT(route->ksnr_retry_interval); - route->ksnr_timeout = cfs_time_add(cfs_time_current(), - route->ksnr_retry_interval); - - if (!list_empty(&peer->ksnp_tx_queue) && - !peer->ksnp_accepting && - !ksocknal_find_connecting_route_locked(peer)) { - struct ksock_conn *conn; - - /* - * ksnp_tx_queue is queued on a conn on successful - * connection for V1.x and V2.x - */ - if (!list_empty(&peer->ksnp_conns)) { - conn = list_entry(peer->ksnp_conns.next, - struct ksock_conn, ksnc_list); - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); - } - - /* - * take all the blocked packets while I've got the lock and - * complete below... - */ - list_splice_init(&peer->ksnp_tx_queue, &zombies); - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_peer_failed(peer); - ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1); - return 0; -} - -/* - * check whether we need to create more connds. - * It will try to create new thread if it's necessary, @timeout can - * be updated if failed to create, so caller wouldn't keep try while - * running out of resource. - */ -static int -ksocknal_connd_check_start(time64_t sec, long *timeout) -{ - char name[16]; - int rc; - int total = ksocknal_data.ksnd_connd_starting + - ksocknal_data.ksnd_connd_running; - - if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) { - /* still in initializing */ - return 0; - } - - if (total >= *ksocknal_tunables.ksnd_nconnds_max || - total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) { - /* - * can't create more connd, or still have enough - * threads to handle more connecting - */ - return 0; - } - - if (list_empty(&ksocknal_data.ksnd_connd_routes)) { - /* no pending connecting request */ - return 0; - } - - if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) { - /* may run out of resource, retry later */ - *timeout = HZ; - return 0; - } - - if (ksocknal_data.ksnd_connd_starting > 0) { - /* serialize starting to avoid flood */ - return 0; - } - - ksocknal_data.ksnd_connd_starting_stamp = sec; - ksocknal_data.ksnd_connd_starting++; - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - - /* NB: total is the next id */ - snprintf(name, sizeof(name), "socknal_cd%02d", total); - rc = ksocknal_thread_start(ksocknal_connd, NULL, name); - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - if (!rc) - return 1; - - /* we tried ... */ - LASSERT(ksocknal_data.ksnd_connd_starting > 0); - ksocknal_data.ksnd_connd_starting--; - ksocknal_data.ksnd_connd_failed_stamp = ktime_get_real_seconds(); - - return 1; -} - -/* - * check whether current thread can exit, it will return 1 if there are too - * many threads and no creating in past 120 seconds. - * Also, this function may update @timeout to make caller come back - * again to recheck these conditions. - */ -static int -ksocknal_connd_check_stop(time64_t sec, long *timeout) -{ - int val; - - if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) { - /* still in initializing */ - return 0; - } - - if (ksocknal_data.ksnd_connd_starting > 0) { - /* in progress of starting new thread */ - return 0; - } - - if (ksocknal_data.ksnd_connd_running <= - *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */ - return 0; - } - - /* created thread in past 120 seconds? */ - val = (int)(ksocknal_data.ksnd_connd_starting_stamp + - SOCKNAL_CONND_TIMEOUT - sec); - - *timeout = (val > 0) ? val * HZ : - SOCKNAL_CONND_TIMEOUT * HZ; - if (val > 0) - return 0; - - /* no creating in past 120 seconds */ - - return ksocknal_data.ksnd_connd_running > - ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV; -} - -/* - * Go through connd_routes queue looking for a route that we can process - * right now, @timeout_p can be updated if we need to come back later - */ -static struct ksock_route * -ksocknal_connd_get_route_locked(signed long *timeout_p) -{ - struct ksock_route *route; - unsigned long now; - - now = cfs_time_current(); - - /* connd_routes can contain both pending and ordinary routes */ - list_for_each_entry(route, &ksocknal_data.ksnd_connd_routes, - ksnr_connd_list) { - if (!route->ksnr_retry_interval || - cfs_time_aftereq(now, route->ksnr_timeout)) - return route; - - if (*timeout_p == MAX_SCHEDULE_TIMEOUT || - (int)*timeout_p > (int)(route->ksnr_timeout - now)) - *timeout_p = (int)(route->ksnr_timeout - now); - } - - return NULL; -} - -int -ksocknal_connd(void *arg) -{ - spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock; - struct ksock_connreq *cr; - wait_queue_entry_t wait; - int nloops = 0; - int cons_retry = 0; - - init_waitqueue_entry(&wait, current); - - spin_lock_bh(connd_lock); - - LASSERT(ksocknal_data.ksnd_connd_starting > 0); - ksocknal_data.ksnd_connd_starting--; - ksocknal_data.ksnd_connd_running++; - - while (!ksocknal_data.ksnd_shuttingdown) { - struct ksock_route *route = NULL; - time64_t sec = ktime_get_real_seconds(); - long timeout = MAX_SCHEDULE_TIMEOUT; - int dropped_lock = 0; - - if (ksocknal_connd_check_stop(sec, &timeout)) { - /* wakeup another one to check stop */ - wake_up(&ksocknal_data.ksnd_connd_waitq); - break; - } - - if (ksocknal_connd_check_start(sec, &timeout)) { - /* created new thread */ - dropped_lock = 1; - } - - if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) { - /* Connection accepted by the listener */ - cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next, - struct ksock_connreq, ksncr_list); - - list_del(&cr->ksncr_list); - spin_unlock_bh(connd_lock); - dropped_lock = 1; - - ksocknal_create_conn(cr->ksncr_ni, NULL, - cr->ksncr_sock, SOCKLND_CONN_NONE); - lnet_ni_decref(cr->ksncr_ni); - kfree(cr); - - spin_lock_bh(connd_lock); - } - - /* - * Only handle an outgoing connection request if there - * is a thread left to handle incoming connections and - * create new connd - */ - if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV < - ksocknal_data.ksnd_connd_running) { - route = ksocknal_connd_get_route_locked(&timeout); - } - if (route) { - list_del(&route->ksnr_connd_list); - ksocknal_data.ksnd_connd_connecting++; - spin_unlock_bh(connd_lock); - dropped_lock = 1; - - if (ksocknal_connect(route)) { - /* consecutive retry */ - if (cons_retry++ > SOCKNAL_INSANITY_RECONN) { - CWARN("massive consecutive re-connecting to %pI4h\n", - &route->ksnr_ipaddr); - cons_retry = 0; - } - } else { - cons_retry = 0; - } - - ksocknal_route_decref(route); - - spin_lock_bh(connd_lock); - ksocknal_data.ksnd_connd_connecting--; - } - - if (dropped_lock) { - if (++nloops < SOCKNAL_RESCHED) - continue; - spin_unlock_bh(connd_lock); - nloops = 0; - cond_resched(); - spin_lock_bh(connd_lock); - continue; - } - - /* Nothing to do for 'timeout' */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq, - &wait); - spin_unlock_bh(connd_lock); - - nloops = 0; - schedule_timeout(timeout); - - remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait); - spin_lock_bh(connd_lock); - } - ksocknal_data.ksnd_connd_running--; - spin_unlock_bh(connd_lock); - - ksocknal_thread_fini(); - return 0; -} - -static struct ksock_conn * -ksocknal_find_timed_out_conn(struct ksock_peer *peer) -{ - /* We're called with a shared lock on ksnd_global_lock */ - struct ksock_conn *conn; - struct list_head *ctmp; - - list_for_each(ctmp, &peer->ksnp_conns) { - int error; - - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - /* Don't need the {get,put}connsock dance to deref ksnc_sock */ - LASSERT(!conn->ksnc_closing); - - /* - * SOCK_ERROR will reset error code of socket in - * some platform (like Darwin8.x) - */ - error = conn->ksnc_sock->sk->sk_err; - if (error) { - ksocknal_conn_addref(conn); - - switch (error) { - case ECONNRESET: - CNETERR("A connection with %s (%pI4h:%d) was reset; it may have rebooted.\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - break; - case ETIMEDOUT: - CNETERR("A connection with %s (%pI4h:%d) timed out; the network or node may be down.\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - break; - default: - CNETERR("An unexpected network error %d occurred with %s (%pI4h:%d\n", - error, - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - break; - } - - return conn; - } - - if (conn->ksnc_rx_started && - cfs_time_aftereq(cfs_time_current(), - conn->ksnc_rx_deadline)) { - /* Timed out incomplete incoming message */ - ksocknal_conn_addref(conn); - CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port, - conn->ksnc_rx_state, - iov_iter_count(&conn->ksnc_rx_to), - conn->ksnc_rx_nob_left); - return conn; - } - - if ((!list_empty(&conn->ksnc_tx_queue) || - conn->ksnc_sock->sk->sk_wmem_queued) && - cfs_time_aftereq(cfs_time_current(), - conn->ksnc_tx_deadline)) { - /* - * Timed out messages queued for sending or - * buffered in the socket's send buffer - */ - ksocknal_conn_addref(conn); - CNETERR("Timeout sending data to %s (%pI4h:%d) the network or that node may be down.\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - return conn; - } - } - - return NULL; -} - -static inline void -ksocknal_flush_stale_txs(struct ksock_peer *peer) -{ - struct ksock_tx *tx; - struct ksock_tx *tmp; - LIST_HEAD(stale_txs); - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - list_for_each_entry_safe(tx, tmp, &peer->ksnp_tx_queue, tx_list) { - if (!cfs_time_aftereq(cfs_time_current(), - tx->tx_deadline)) - break; - - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &stale_txs); - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1); -} - -static int -ksocknal_send_keepalive_locked(struct ksock_peer *peer) - __must_hold(&ksocknal_data.ksnd_global_lock) -{ - struct ksock_sched *sched; - struct ksock_conn *conn; - struct ksock_tx *tx; - - /* last_alive will be updated by create_conn */ - if (list_empty(&peer->ksnp_conns)) - return 0; - - if (peer->ksnp_proto != &ksocknal_protocol_v3x) - return 0; - - if (*ksocknal_tunables.ksnd_keepalive <= 0 || - time_before(cfs_time_current(), - cfs_time_add(peer->ksnp_last_alive, - *ksocknal_tunables.ksnd_keepalive * HZ))) - return 0; - - if (time_before(cfs_time_current(), peer->ksnp_send_keepalive)) - return 0; - - /* - * retry 10 secs later, so we wouldn't put pressure - * on this peer if we failed to send keepalive this time - */ - peer->ksnp_send_keepalive = cfs_time_shift(10); - - conn = ksocknal_find_conn_locked(peer, NULL, 1); - if (conn) { - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - if (!list_empty(&conn->ksnc_tx_queue)) { - spin_unlock_bh(&sched->kss_lock); - /* there is an queued ACK, don't need keepalive */ - return 0; - } - - spin_unlock_bh(&sched->kss_lock); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - /* cookie = 1 is reserved for keepalive PING */ - tx = ksocknal_alloc_tx_noop(1, 1); - if (!tx) { - read_lock(&ksocknal_data.ksnd_global_lock); - return -ENOMEM; - } - - if (!ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) { - read_lock(&ksocknal_data.ksnd_global_lock); - return 1; - } - - ksocknal_free_tx(tx); - read_lock(&ksocknal_data.ksnd_global_lock); - - return -EIO; -} - -static void -ksocknal_check_peer_timeouts(int idx) -{ - struct list_head *peers = &ksocknal_data.ksnd_peers[idx]; - struct ksock_peer *peer; - struct ksock_conn *conn; - struct ksock_tx *tx; - - again: - /* - * NB. We expect to have a look at all the peers and not find any - * connections to time out, so we just use a shared lock while we - * take a look... - */ - read_lock(&ksocknal_data.ksnd_global_lock); - - list_for_each_entry(peer, peers, ksnp_list) { - unsigned long deadline = 0; - struct ksock_tx *tx_stale; - int resid = 0; - int n = 0; - - if (ksocknal_send_keepalive_locked(peer)) { - read_unlock(&ksocknal_data.ksnd_global_lock); - goto again; - } - - conn = ksocknal_find_timed_out_conn(peer); - - if (conn) { - read_unlock(&ksocknal_data.ksnd_global_lock); - - ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT); - - /* - * NB we won't find this one again, but we can't - * just proceed with the next peer, since we dropped - * ksnd_global_lock and it might be dead already! - */ - ksocknal_conn_decref(conn); - goto again; - } - - /* - * we can't process stale txs right here because we're - * holding only shared lock - */ - if (!list_empty(&peer->ksnp_tx_queue)) { - tx = list_entry(peer->ksnp_tx_queue.next, - struct ksock_tx, tx_list); - - if (cfs_time_aftereq(cfs_time_current(), - tx->tx_deadline)) { - ksocknal_peer_addref(peer); - read_unlock(&ksocknal_data.ksnd_global_lock); - - ksocknal_flush_stale_txs(peer); - - ksocknal_peer_decref(peer); - goto again; - } - } - - if (list_empty(&peer->ksnp_zc_req_list)) - continue; - - tx_stale = NULL; - spin_lock(&peer->ksnp_lock); - list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) { - if (!cfs_time_aftereq(cfs_time_current(), - tx->tx_deadline)) - break; - /* ignore the TX if connection is being closed */ - if (tx->tx_conn->ksnc_closing) - continue; - if (!tx_stale) - tx_stale = tx; - n++; - } - - if (!tx_stale) { - spin_unlock(&peer->ksnp_lock); - continue; - } - - deadline = tx_stale->tx_deadline; - resid = tx_stale->tx_resid; - conn = tx_stale->tx_conn; - ksocknal_conn_addref(conn); - - spin_unlock(&peer->ksnp_lock); - read_unlock(&ksocknal_data.ksnd_global_lock); - - CERROR("Total %d stale ZC_REQs for peer %s detected; the oldest(%p) timed out %ld secs ago, resid: %d, wmem: %d\n", - n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale, - cfs_duration_sec(cfs_time_current() - deadline), - resid, conn->ksnc_sock->sk->sk_wmem_queued); - - ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT); - ksocknal_conn_decref(conn); - goto again; - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_reaper(void *arg) -{ - wait_queue_entry_t wait; - struct ksock_conn *conn; - struct ksock_sched *sched; - struct list_head enomem_conns; - int nenomem_conns; - long timeout; - int i; - int peer_index = 0; - unsigned long deadline = cfs_time_current(); - - INIT_LIST_HEAD(&enomem_conns); - init_waitqueue_entry(&wait, current); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) { - conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next, - struct ksock_conn, ksnc_list); - list_del(&conn->ksnc_list); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - ksocknal_terminate_conn(conn); - ksocknal_conn_decref(conn); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) { - conn = list_entry(ksocknal_data.ksnd_zombie_conns.next, - struct ksock_conn, ksnc_list); - list_del(&conn->ksnc_list); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - ksocknal_destroy_conn(conn); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty(&ksocknal_data.ksnd_enomem_conns)) { - list_add(&enomem_conns, - &ksocknal_data.ksnd_enomem_conns); - list_del_init(&ksocknal_data.ksnd_enomem_conns); - } - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - /* reschedule all the connections that stalled with ENOMEM... */ - nenomem_conns = 0; - while (!list_empty(&enomem_conns)) { - conn = list_entry(enomem_conns.next, struct ksock_conn, - ksnc_tx_list); - list_del(&conn->ksnc_tx_list); - - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - - LASSERT(conn->ksnc_tx_scheduled); - conn->ksnc_tx_ready = 1; - list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); - wake_up(&sched->kss_waitq); - - spin_unlock_bh(&sched->kss_lock); - nenomem_conns++; - } - - /* careful with the jiffy wrap... */ - while ((timeout = cfs_time_sub(deadline, - cfs_time_current())) <= 0) { - const int n = 4; - const int p = 1; - int chunk = ksocknal_data.ksnd_peer_hash_size; - - /* - * Time to check for timeouts on a few more peers: I do - * checks every 'p' seconds on a proportion of the peer - * table and I need to check every connection 'n' times - * within a timeout interval, to ensure I detect a - * timeout on any connection within (n+1)/n times the - * timeout interval. - */ - if (*ksocknal_tunables.ksnd_timeout > n * p) - chunk = (chunk * n * p) / - *ksocknal_tunables.ksnd_timeout; - if (!chunk) - chunk = 1; - - for (i = 0; i < chunk; i++) { - ksocknal_check_peer_timeouts(peer_index); - peer_index = (peer_index + 1) % - ksocknal_data.ksnd_peer_hash_size; - } - - deadline = cfs_time_add(deadline, p * HZ); - } - - if (nenomem_conns) { - /* - * Reduce my timeout if I rescheduled ENOMEM conns. - * This also prevents me getting woken immediately - * if any go back on my enomem list. - */ - timeout = SOCKNAL_ENOMEM_RETRY; - } - ksocknal_data.ksnd_reaper_waketime = - cfs_time_add(cfs_time_current(), timeout); - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait); - - if (!ksocknal_data.ksnd_shuttingdown && - list_empty(&ksocknal_data.ksnd_deathrow_conns) && - list_empty(&ksocknal_data.ksnd_zombie_conns)) - schedule_timeout(timeout); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - } - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - ksocknal_thread_fini(); - return 0; -} diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c deleted file mode 100644 index 7941cfa526bc..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ /dev/null @@ -1,533 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include "socklnd.h" - -int -ksocknal_lib_get_conn_addrs(struct ksock_conn *conn) -{ - int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr, - &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT(!conn->ksnc_closing); - - if (rc) { - CERROR("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL); - if (rc) { - CERROR("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -int -ksocknal_lib_zc_capable(struct ksock_conn *conn) -{ - int caps = conn->ksnc_sock->sk->sk_route_caps; - - if (conn->ksnc_proto == &ksocknal_protocol_v1x) - return 0; - - /* - * ZC if the socket supports scatter/gather and doesn't need software - * checksums - */ - return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK)); -} - -int -ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; - struct socket *sock = conn->ksnc_sock; - int nob, i; - - if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ - conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ - tx->tx_nob == tx->tx_resid && /* frist sending */ - !tx->tx_msg.ksm_csum) /* not checksummed */ - ksocknal_lib_csum_tx(tx); - - for (nob = i = 0; i < tx->tx_niov; i++) - nob += tx->tx_iov[i].iov_len; - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, - tx->tx_iov, tx->tx_niov, nob); - return sock_sendmsg(sock, &msg); -} - -int -ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct socket *sock = conn->ksnc_sock; - struct bio_vec *kiov = tx->tx_kiov; - int rc; - int nob; - - /* Not NOOP message */ - LASSERT(tx->tx_lnetmsg); - - if (tx->tx_msg.ksm_zc_cookies[0]) { - /* Zero copy is enabled */ - struct sock *sk = sock->sk; - struct page *page = kiov->bv_page; - int offset = kiov->bv_offset; - int fragsize = kiov->bv_len; - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->bv_len); - - if (!list_empty(&conn->ksnc_tx_queue) || - fragsize < tx->tx_resid) - msgflg |= MSG_MORE; - - if (sk->sk_prot->sendpage) { - rc = sk->sk_prot->sendpage(sk, page, - offset, fragsize, msgflg); - } else { - rc = tcp_sendpage(sk, page, offset, fragsize, msgflg); - } - } else { - struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; - int i; - - for (nob = i = 0; i < tx->tx_nkiov; i++) - nob += kiov[i].bv_len; - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, - kiov, tx->tx_nkiov, nob); - rc = sock_sendmsg(sock, &msg); - } - return rc; -} - -void -ksocknal_lib_eager_ack(struct ksock_conn *conn) -{ - int opt = 1; - struct socket *sock = conn->ksnc_sock; - - /* - * Remind the socket to ACK eagerly. If I don't, the socket might - * think I'm about to send something it could piggy-back the ACK - * on, introducing delay in completing zero-copy sends in my - * peer. - */ - kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt, - sizeof(opt)); -} - -static int lustre_csum(struct kvec *v, void *context) -{ - struct ksock_conn *conn = context; - conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, - v->iov_base, v->iov_len); - return 0; -} - -int -ksocknal_lib_recv(struct ksock_conn *conn) -{ - struct msghdr msg = { .msg_iter = conn->ksnc_rx_to }; - __u32 saved_csum; - int rc; - - rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); - if (rc <= 0) - return rc; - - saved_csum = conn->ksnc_msg.ksm_csum; - if (!saved_csum) - return rc; - - /* header is included only in V2 - V3 checksums only the bulk data */ - if (!(conn->ksnc_rx_to.type & ITER_BVEC) && - conn->ksnc_proto != &ksocknal_protocol_v2x) - return rc; - - /* accumulate checksum */ - conn->ksnc_msg.ksm_csum = 0; - iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn); - conn->ksnc_msg.ksm_csum = saved_csum; - - return rc; -} - -void -ksocknal_lib_csum_tx(struct ksock_tx *tx) -{ - int i; - __u32 csum; - void *base; - - LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg); - LASSERT(tx->tx_conn); - LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); - - tx->tx_msg.ksm_csum = 0; - - csum = crc32_le(~0, tx->tx_iov[0].iov_base, - tx->tx_iov[0].iov_len); - - if (tx->tx_kiov) { - for (i = 0; i < tx->tx_nkiov; i++) { - base = kmap(tx->tx_kiov[i].bv_page) + - tx->tx_kiov[i].bv_offset; - - csum = crc32_le(csum, base, tx->tx_kiov[i].bv_len); - - kunmap(tx->tx_kiov[i].bv_page); - } - } else { - for (i = 1; i < tx->tx_niov; i++) - csum = crc32_le(csum, tx->tx_iov[i].iov_base, - tx->tx_iov[i].iov_len); - } - - if (*ksocknal_tunables.ksnd_inject_csum_error) { - csum++; - *ksocknal_tunables.ksnd_inject_csum_error = 0; - } - - tx->tx_msg.ksm_csum = csum; -} - -int -ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, - int *rxmem, int *nagle) -{ - struct socket *sock = conn->ksnc_sock; - int len; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc) { - LASSERT(conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return -ESHUTDOWN; - } - - rc = lnet_sock_getbuf(sock, txmem, rxmem); - if (!rc) { - len = sizeof(*nagle); - rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); - } - - ksocknal_connsock_decref(conn); - - if (!rc) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return rc; -} - -int -ksocknal_lib_setup_sock(struct socket *sock) -{ - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - - sock->sk->sk_allocation = GFP_NOFS; - - /* - * Ensure this socket aborts active sends immediately when we close - * it. - */ - linger.l_onoff = 0; - linger.l_linger = 0; - - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger, - sizeof(linger)); - if (rc) { - CERROR("Can't set SO_LINGER: %d\n", rc); - return rc; - } - - option = -1; - rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option, - sizeof(option)); - if (rc) { - CERROR("Can't set SO_LINGER2: %d\n", rc); - return rc; - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't disable nagle: %d\n", rc); - return rc; - } - } - - rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc) { - CERROR("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return rc; - } - -/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */ - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (do_keepalive ? 1 : 0); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option, - sizeof(option)); - if (rc) { - CERROR("Can't set SO_KEEPALIVE: %d\n", rc); - return rc; - } - - if (!do_keepalive) - return 0; - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle, - sizeof(keep_idle)); - if (rc) { - CERROR("Can't set TCP_KEEPIDLE: %d\n", rc); - return rc; - } - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof(keep_intvl)); - if (rc) { - CERROR("Can't set TCP_KEEPINTVL: %d\n", rc); - return rc; - } - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count, - sizeof(keep_count)); - if (rc) { - CERROR("Can't set TCP_KEEPCNT: %d\n", rc); - return rc; - } - - return 0; -} - -void -ksocknal_lib_push_conn(struct ksock_conn *conn) -{ - struct sock *sk; - struct tcp_sock *tp; - int nonagle; - int val = 1; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc) /* being shut down */ - return; - - sk = conn->ksnc_sock->sk; - tp = tcp_sk(sk); - - lock_sock(sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock(sk); - - rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - LASSERT(!rc); - - lock_sock(sk); - tp->nonagle = nonagle; - release_sock(sk); - - ksocknal_connsock_decref(conn); -} - -/* - * socket call back in Linux - */ -static void -ksocknal_data_ready(struct sock *sk) -{ - struct ksock_conn *conn; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock(&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - if (!conn) { /* raced with ksocknal_terminate_conn */ - LASSERT(sk->sk_data_ready != &ksocknal_data_ready); - sk->sk_data_ready(sk); - } else { - ksocknal_read_callback(conn); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -static void -ksocknal_write_space(struct sock *sk) -{ - struct ksock_conn *conn; - int wspace; - int min_wpace; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock(&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - wspace = sk_stream_wspace(sk); - min_wpace = sk_stream_min_wspace(sk); - - CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", - sk, wspace, min_wpace, conn, - !conn ? "" : (conn->ksnc_tx_ready ? - " ready" : " blocked"), - !conn ? "" : (conn->ksnc_tx_scheduled ? - " scheduled" : " idle"), - !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ? - " empty" : " queued")); - - if (!conn) { /* raced with ksocknal_terminate_conn */ - LASSERT(sk->sk_write_space != &ksocknal_write_space); - sk->sk_write_space(sk); - - read_unlock(&ksocknal_data.ksnd_global_lock); - return; - } - - if (wspace >= min_wpace) { /* got enough space */ - ksocknal_write_callback(conn); - - /* - * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the - * ENOMEM check in ksocknal_transmit is race-free (think about - * it). - */ - clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -void -ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn) -{ - conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; - conn->ksnc_saved_write_space = sock->sk->sk_write_space; -} - -void -ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn) -{ - sock->sk->sk_user_data = conn; - sock->sk->sk_data_ready = ksocknal_data_ready; - sock->sk->sk_write_space = ksocknal_write_space; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn) -{ - /* - * Remove conn's network callbacks. - * NB I _have_ to restore the callback, rather than storing a noop, - * since the socket could survive past this module being unloaded!! - */ - sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; - sock->sk->sk_write_space = conn->ksnc_saved_write_space; - - /* - * A callback could be in progress already; they hold a read lock - * on ksnd_global_lock (to serialise with me) and NOOP if - * sk_user_data is NULL. - */ - sock->sk->sk_user_data = NULL; -} - -int -ksocknal_lib_memory_pressure(struct ksock_conn *conn) -{ - int rc = 0; - struct ksock_sched *sched; - - sched = conn->ksnc_scheduler; - spin_lock_bh(&sched->kss_lock); - - if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) && - !conn->ksnc_tx_ready) { - /* - * SOCK_NOSPACE is set when the socket fills - * and cleared in the write_space callback - * (which also sets ksnc_tx_ready). If - * SOCK_NOSPACE and ksnc_tx_ready are BOTH - * zero, I didn't fill the socket and - * write_space won't reschedule me, so I - * return -ENOMEM to get my caller to retry - * after a timeout - */ - rc = -ENOMEM; - } - - spin_unlock_bh(&sched->kss_lock); - - return rc; -} diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c deleted file mode 100644 index 5663a4ca94d4..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * Author: Eric Barton <eric@bartonsoftware.com> - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "socklnd.h" - -static int sock_timeout = 50; -module_param(sock_timeout, int, 0644); -MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)"); - -static int credits = 256; -module_param(credits, int, 0444); -MODULE_PARM_DESC(credits, "# concurrent sends"); - -static int peer_credits = 8; -module_param(peer_credits, int, 0444); -MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer"); - -static int peer_buffer_credits; -module_param(peer_buffer_credits, int, 0444); -MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits"); - -static int peer_timeout = 180; -module_param(peer_timeout, int, 0444); -MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); - -/* - * Number of daemons in each thread pool which is percpt, - * we will estimate reasonable value based on CPUs if it's not set. - */ -static unsigned int nscheds; -module_param(nscheds, int, 0444); -MODULE_PARM_DESC(nscheds, "# scheduler daemons in each pool while starting"); - -static int nconnds = 4; -module_param(nconnds, int, 0444); -MODULE_PARM_DESC(nconnds, "# connection daemons while starting"); - -static int nconnds_max = 64; -module_param(nconnds_max, int, 0444); -MODULE_PARM_DESC(nconnds_max, "max # connection daemons"); - -static int min_reconnectms = 1000; -module_param(min_reconnectms, int, 0644); -MODULE_PARM_DESC(min_reconnectms, "min connection retry interval (mS)"); - -static int max_reconnectms = 60000; -module_param(max_reconnectms, int, 0644); -MODULE_PARM_DESC(max_reconnectms, "max connection retry interval (mS)"); - -# define DEFAULT_EAGER_ACK 0 -static int eager_ack = DEFAULT_EAGER_ACK; -module_param(eager_ack, int, 0644); -MODULE_PARM_DESC(eager_ack, "send tcp ack packets eagerly"); - -static int typed_conns = 1; -module_param(typed_conns, int, 0444); -MODULE_PARM_DESC(typed_conns, "use different sockets for bulk"); - -static int min_bulk = 1 << 10; -module_param(min_bulk, int, 0644); -MODULE_PARM_DESC(min_bulk, "smallest 'large' message"); - -# define DEFAULT_BUFFER_SIZE 0 -static int tx_buffer_size = DEFAULT_BUFFER_SIZE; -module_param(tx_buffer_size, int, 0644); -MODULE_PARM_DESC(tx_buffer_size, "socket tx buffer size (0 for system default)"); - -static int rx_buffer_size = DEFAULT_BUFFER_SIZE; -module_param(rx_buffer_size, int, 0644); -MODULE_PARM_DESC(rx_buffer_size, "socket rx buffer size (0 for system default)"); - -static int nagle; -module_param(nagle, int, 0644); -MODULE_PARM_DESC(nagle, "enable NAGLE?"); - -static int round_robin = 1; -module_param(round_robin, int, 0644); -MODULE_PARM_DESC(round_robin, "Round robin for multiple interfaces"); - -static int keepalive = 30; -module_param(keepalive, int, 0644); -MODULE_PARM_DESC(keepalive, "# seconds before send keepalive"); - -static int keepalive_idle = 30; -module_param(keepalive_idle, int, 0644); -MODULE_PARM_DESC(keepalive_idle, "# idle seconds before probe"); - -#define DEFAULT_KEEPALIVE_COUNT 5 -static int keepalive_count = DEFAULT_KEEPALIVE_COUNT; -module_param(keepalive_count, int, 0644); -MODULE_PARM_DESC(keepalive_count, "# missed probes == dead"); - -static int keepalive_intvl = 5; -module_param(keepalive_intvl, int, 0644); -MODULE_PARM_DESC(keepalive_intvl, "seconds between probes"); - -static int enable_csum; -module_param(enable_csum, int, 0644); -MODULE_PARM_DESC(enable_csum, "enable check sum"); - -static int inject_csum_error; -module_param(inject_csum_error, int, 0644); -MODULE_PARM_DESC(inject_csum_error, "set non-zero to inject a checksum error"); - -static int nonblk_zcack = 1; -module_param(nonblk_zcack, int, 0644); -MODULE_PARM_DESC(nonblk_zcack, "always send ZC-ACK on non-blocking connection"); - -static unsigned int zc_min_payload = 16 << 10; -module_param(zc_min_payload, int, 0644); -MODULE_PARM_DESC(zc_min_payload, "minimum payload size to zero copy"); - -static unsigned int zc_recv; -module_param(zc_recv, int, 0644); -MODULE_PARM_DESC(zc_recv, "enable ZC recv for Chelsio driver"); - -static unsigned int zc_recv_min_nfrags = 16; -module_param(zc_recv_min_nfrags, int, 0644); -MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv"); - -#if SOCKNAL_VERSION_DEBUG -static int protocol = 3; -module_param(protocol, int, 0644); -MODULE_PARM_DESC(protocol, "protocol version"); -#endif - -struct ksock_tunables ksocknal_tunables; - -int ksocknal_tunables_init(void) -{ - /* initialize ksocknal_tunables structure */ - ksocknal_tunables.ksnd_timeout = &sock_timeout; - ksocknal_tunables.ksnd_nscheds = &nscheds; - ksocknal_tunables.ksnd_nconnds = &nconnds; - ksocknal_tunables.ksnd_nconnds_max = &nconnds_max; - ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms; - ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms; - ksocknal_tunables.ksnd_eager_ack = &eager_ack; - ksocknal_tunables.ksnd_typed_conns = &typed_conns; - ksocknal_tunables.ksnd_min_bulk = &min_bulk; - ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size; - ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size; - ksocknal_tunables.ksnd_nagle = &nagle; - ksocknal_tunables.ksnd_round_robin = &round_robin; - ksocknal_tunables.ksnd_keepalive = &keepalive; - ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle; - ksocknal_tunables.ksnd_keepalive_count = &keepalive_count; - ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl; - ksocknal_tunables.ksnd_credits = &credits; - ksocknal_tunables.ksnd_peertxcredits = &peer_credits; - ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits; - ksocknal_tunables.ksnd_peertimeout = &peer_timeout; - ksocknal_tunables.ksnd_enable_csum = &enable_csum; - ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error; - ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack; - ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload; - ksocknal_tunables.ksnd_zc_recv = &zc_recv; - ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags; - -#if SOCKNAL_VERSION_DEBUG - ksocknal_tunables.ksnd_protocol = &protocol; -#endif - - if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10)) - *ksocknal_tunables.ksnd_zc_min_payload = 2 << 10; - - return 0; -}; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c deleted file mode 100644 index 05982dac781c..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c +++ /dev/null @@ -1,810 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2012, Intel Corporation. - * - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "socklnd.h" - -/* - * Protocol entries : - * pro_send_hello : send hello message - * pro_recv_hello : receive hello message - * pro_pack : pack message header - * pro_unpack : unpack message header - * pro_queue_tx_zcack() : Called holding BH lock: kss_lock - * return 1 if ACK is piggybacked, otherwise return 0 - * pro_queue_tx_msg() : Called holding BH lock: kss_lock - * return the ACK that piggybacked by my message, or NULL - * pro_handle_zcreq() : handler of incoming ZC-REQ - * pro_handle_zcack() : handler of incoming ZC-ACK - * pro_match_tx() : Called holding glock - */ - -static struct ksock_tx * -ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg) -{ - /* V1.x, just enqueue it */ - list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); - return NULL; -} - -void -ksocknal_next_tx_carrier(struct ksock_conn *conn) -{ - struct ksock_tx *tx = conn->ksnc_tx_carrier; - - /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - LASSERT(tx); - - /* Next TX that can carry ZC-ACK or LNet message */ - if (tx->tx_list.next == &conn->ksnc_tx_queue) { - /* no more packets queued */ - conn->ksnc_tx_carrier = NULL; - } else { - conn->ksnc_tx_carrier = list_next_entry(tx, tx_list); - LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type); - } -} - -static int -ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn, - struct ksock_tx *tx_ack, __u64 cookie) -{ - struct ksock_tx *tx = conn->ksnc_tx_carrier; - - LASSERT(!tx_ack || - tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - /* - * Enqueue or piggyback tx_ack / cookie - * . no tx can piggyback cookie of tx_ack (or cookie), just - * enqueue the tx_ack (if tx_ack != NUL) and return NULL. - * . There is tx can piggyback cookie of tx_ack (or cookie), - * piggyback the cookie and return the tx. - */ - if (!tx) { - if (tx_ack) { - list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); - conn->ksnc_tx_carrier = tx_ack; - } - return 0; - } - - if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) { - /* tx is noop zc-ack, can't piggyback zc-ack cookie */ - if (tx_ack) - list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); - return 0; - } - - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET); - LASSERT(!tx->tx_msg.ksm_zc_cookies[1]); - - if (tx_ack) - cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; - - /* piggyback the zc-ack cookie */ - tx->tx_msg.ksm_zc_cookies[1] = cookie; - /* move on to the next TX which can carry cookie */ - ksocknal_next_tx_carrier(conn); - - return 1; -} - -static struct ksock_tx * -ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg) -{ - struct ksock_tx *tx = conn->ksnc_tx_carrier; - - /* - * Enqueue tx_msg: - * . If there is no NOOP on the connection, just enqueue - * tx_msg and return NULL - * . If there is NOOP on the connection, piggyback the cookie - * and replace the NOOP tx, and return the NOOP tx. - */ - if (!tx) { /* nothing on queue */ - list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); - conn->ksnc_tx_carrier = tx_msg; - return NULL; - } - - if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */ - list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); - return NULL; - } - - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - /* There is a noop zc-ack can be piggybacked */ - tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1]; - ksocknal_next_tx_carrier(conn); - - /* use new_tx to replace the noop zc-ack packet */ - list_add(&tx_msg->tx_list, &tx->tx_list); - list_del(&tx->tx_list); - - return tx; -} - -static int -ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, - struct ksock_tx *tx_ack, __u64 cookie) -{ - struct ksock_tx *tx; - - if (conn->ksnc_type != SOCKLND_CONN_ACK) - return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie); - - /* non-blocking ZC-ACK (to router) */ - LASSERT(!tx_ack || - tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - tx = conn->ksnc_tx_carrier; - if (!tx) { - if (tx_ack) { - list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); - conn->ksnc_tx_carrier = tx_ack; - } - return 0; - } - - /* conn->ksnc_tx_carrier */ - - if (tx_ack) - cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; - - if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */ - return 1; - - if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) { - /* replace the keepalive PING with a real ACK */ - LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); - tx->tx_msg.ksm_zc_cookies[1] = cookie; - return 1; - } - - if (cookie == tx->tx_msg.ksm_zc_cookies[0] || - cookie == tx->tx_msg.ksm_zc_cookies[1]) { - CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX return error in the future */ - } - - if (!tx->tx_msg.ksm_zc_cookies[0]) { - /* - * NOOP tx has only one ZC-ACK cookie, - * can carry at least one more - */ - if (tx->tx_msg.ksm_zc_cookies[1] > cookie) { - tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1]; - tx->tx_msg.ksm_zc_cookies[1] = cookie; - } else { - tx->tx_msg.ksm_zc_cookies[0] = cookie; - } - - if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) { - /* - * not likely to carry more ACKs, skip it - * to simplify logic - */ - ksocknal_next_tx_carrier(conn); - } - - return 1; - } - - /* takes two or more cookies already */ - - if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) { - __u64 tmp = 0; - - /* two separated cookies: (a+2, a) or (a+1, a) */ - LASSERT(tx->tx_msg.ksm_zc_cookies[0] - - tx->tx_msg.ksm_zc_cookies[1] <= 2); - - if (tx->tx_msg.ksm_zc_cookies[0] - - tx->tx_msg.ksm_zc_cookies[1] == 2) { - if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) - tmp = cookie; - } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) { - tmp = tx->tx_msg.ksm_zc_cookies[1]; - } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) { - tmp = tx->tx_msg.ksm_zc_cookies[0]; - } - - if (tmp) { - /* range of cookies */ - tx->tx_msg.ksm_zc_cookies[0] = tmp - 1; - tx->tx_msg.ksm_zc_cookies[1] = tmp + 1; - return 1; - } - - } else { - /* - * ksm_zc_cookies[0] < ksm_zc_cookies[1], - * it is range of cookies - */ - if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && - cookie <= tx->tx_msg.ksm_zc_cookies[1]) { - CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX: return error in the future */ - } - - if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) { - tx->tx_msg.ksm_zc_cookies[1] = cookie; - return 1; - } - - if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) { - tx->tx_msg.ksm_zc_cookies[0] = cookie; - return 1; - } - } - - /* failed to piggyback ZC-ACK */ - if (tx_ack) { - list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); - /* the next tx can piggyback at least 1 ACK */ - ksocknal_next_tx_carrier(conn); - } - - return 0; -} - -static int -ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) -{ - int nob; - -#if SOCKNAL_VERSION_DEBUG - if (!*ksocknal_tunables.ksnd_typed_conns) - return SOCKNAL_MATCH_YES; -#endif - - if (!tx || !tx->tx_lnetmsg) { - /* noop packet */ - nob = offsetof(struct ksock_msg, ksm_u); - } else { - nob = tx->tx_lnetmsg->msg_len + - ((conn->ksnc_proto == &ksocknal_protocol_v1x) ? - sizeof(struct lnet_hdr) : sizeof(struct ksock_msg)); - } - - /* default checking for typed connection */ - switch (conn->ksnc_type) { - default: - CERROR("ksnc_type bad: %u\n", conn->ksnc_type); - LBUG(); - case SOCKLND_CONN_ANY: - return SOCKNAL_MATCH_YES; - - case SOCKLND_CONN_BULK_IN: - return SOCKNAL_MATCH_MAY; - - case SOCKLND_CONN_BULK_OUT: - if (nob < *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - - case SOCKLND_CONN_CONTROL: - if (nob >= *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - } -} - -static int -ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) -{ - int nob; - - if (!tx || !tx->tx_lnetmsg) - nob = offsetof(struct ksock_msg, ksm_u); - else - nob = tx->tx_lnetmsg->msg_len + sizeof(struct ksock_msg); - - switch (conn->ksnc_type) { - default: - CERROR("ksnc_type bad: %u\n", conn->ksnc_type); - LBUG(); - case SOCKLND_CONN_ANY: - return SOCKNAL_MATCH_NO; - - case SOCKLND_CONN_ACK: - if (nonblk) - return SOCKNAL_MATCH_YES; - else if (!tx || !tx->tx_lnetmsg) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_NO; - - case SOCKLND_CONN_BULK_OUT: - if (nonblk) - return SOCKNAL_MATCH_NO; - else if (nob < *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - - case SOCKLND_CONN_CONTROL: - if (nonblk) - return SOCKNAL_MATCH_NO; - else if (nob >= *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - } -} - -/* (Sink) handle incoming ZC request from sender */ -static int -ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote) -{ - struct ksock_peer *peer = c->ksnc_peer; - struct ksock_conn *conn; - struct ksock_tx *tx; - int rc; - - read_lock(&ksocknal_data.ksnd_global_lock); - - conn = ksocknal_find_conn_locked(peer, NULL, !!remote); - if (conn) { - struct ksock_sched *sched = conn->ksnc_scheduler; - - LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); - - spin_lock_bh(&sched->kss_lock); - - rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie); - - spin_unlock_bh(&sched->kss_lock); - - if (rc) { /* piggybacked */ - read_unlock(&ksocknal_data.ksnd_global_lock); - return 0; - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - /* ACK connection is not ready, or can't piggyback the ACK */ - tx = ksocknal_alloc_tx_noop(cookie, !!remote); - if (!tx) - return -ENOMEM; - - rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id); - if (!rc) - return 0; - - ksocknal_free_tx(tx); - return rc; -} - -/* (Sender) handle ZC_ACK from sink */ -static int -ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2) -{ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_tx *tx; - struct ksock_tx *temp; - struct ksock_tx *tmp; - LIST_HEAD(zlist); - int count; - - if (!cookie1) - cookie1 = cookie2; - - count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1); - - if (cookie2 == SOCKNAL_KEEPALIVE_PING && - conn->ksnc_proto == &ksocknal_protocol_v3x) { - /* keepalive PING for V3.x, just ignore it */ - return count == 1 ? 0 : -EPROTO; - } - - spin_lock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, - tx_zc_list) { - __u64 c = tx->tx_msg.ksm_zc_cookies[0]; - - if (c == cookie1 || c == cookie2 || - (cookie1 < c && c < cookie2)) { - tx->tx_msg.ksm_zc_cookies[0] = 0; - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - - if (!--count) - break; - } - } - - spin_unlock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) { - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } - - return !count ? 0 : -EPROTO; -} - -static int -ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello) -{ - struct socket *sock = conn->ksnc_sock; - struct lnet_hdr *hdr; - struct lnet_magicversion *hmv; - int rc; - int i; - - BUILD_BUG_ON(sizeof(struct lnet_magicversion) != offsetof(struct lnet_hdr, src_nid)); - - hdr = kzalloc(sizeof(*hdr), GFP_NOFS); - if (!hdr) { - CERROR("Can't allocate struct lnet_hdr\n"); - return -ENOMEM; - } - - hmv = (struct lnet_magicversion *)&hdr->dest_nid; - - /* - * Re-organize V2.x message header to V1.x (struct lnet_hdr) - * header and send out - */ - hmv->magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC); - hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR); - hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR); - - if (the_lnet.ln_testprotocompat) { - /* single-shot proto check */ - LNET_LOCK(); - if (the_lnet.ln_testprotocompat & 1) { - hmv->version_major++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - if (the_lnet.ln_testprotocompat & 2) { - hmv->magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - hdr->src_nid = cpu_to_le64(hello->kshm_src_nid); - hdr->src_pid = cpu_to_le32(hello->kshm_src_pid); - hdr->type = cpu_to_le32(LNET_MSG_HELLO); - hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32)); - hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype); - hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation); - - rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", - rc, &conn->ksnc_ipaddr, conn->ksnc_port); - goto out; - } - - if (!hello->kshm_nips) - goto out; - - for (i = 0; i < (int)hello->kshm_nips; i++) - hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]); - - rc = lnet_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", - rc, hello->kshm_nips, - &conn->ksnc_ipaddr, conn->ksnc_port); - } -out: - kfree(hdr); - - return rc; -} - -static int -ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello) -{ - struct socket *sock = conn->ksnc_sock; - int rc; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = conn->ksnc_proto->pro_version; - - if (the_lnet.ln_testprotocompat) { - /* single-shot proto check */ - LNET_LOCK(); - if (the_lnet.ln_testprotocompat & 1) { - hello->kshm_version++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - LNET_UNLOCK(); - } - - rc = lnet_sock_write(sock, hello, offsetof(struct ksock_hello_msg, kshm_ips), - lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", - rc, &conn->ksnc_ipaddr, conn->ksnc_port); - return rc; - } - - if (!hello->kshm_nips) - return 0; - - rc = lnet_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", - rc, hello->kshm_nips, - &conn->ksnc_ipaddr, conn->ksnc_port); - } - - return rc; -} - -static int -ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello, - int timeout) -{ - struct socket *sock = conn->ksnc_sock; - struct lnet_hdr *hdr; - int rc; - int i; - - hdr = kzalloc(sizeof(*hdr), GFP_NOFS); - if (!hdr) { - CERROR("Can't allocate struct lnet_hdr\n"); - return -ENOMEM; - } - - rc = lnet_sock_read(sock, &hdr->src_nid, - sizeof(*hdr) - offsetof(struct lnet_hdr, src_nid), - timeout); - if (rc) { - CERROR("Error %d reading rest of HELLO hdr from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - goto out; - } - - /* ...and check we got what we expected */ - if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) { - CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n", - le32_to_cpu(hdr->type), - &conn->ksnc_ipaddr); - rc = -EPROTO; - goto out; - } - - hello->kshm_src_nid = le64_to_cpu(hdr->src_nid); - hello->kshm_src_pid = le32_to_cpu(hdr->src_pid); - hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation); - hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type); - hello->kshm_nips = le32_to_cpu(hdr->payload_length) / - sizeof(__u32); - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %pI4h\n", - hello->kshm_nips, &conn->ksnc_ipaddr); - rc = -EPROTO; - goto out; - } - - if (!hello->kshm_nips) - goto out; - - rc = lnet_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc) { - CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - goto out; - } - - for (i = 0; i < (int)hello->kshm_nips; i++) { - hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); - - if (!hello->kshm_ips[i]) { - CERROR("Zero IP[%d] from ip %pI4h\n", - i, &conn->ksnc_ipaddr); - rc = -EPROTO; - break; - } - } -out: - kfree(hdr); - - return rc; -} - -static int -ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello, - int timeout) -{ - struct socket *sock = conn->ksnc_sock; - int rc; - int i; - - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->ksnc_flip = 0; - else - conn->ksnc_flip = 1; - - rc = lnet_sock_read(sock, &hello->kshm_src_nid, - offsetof(struct ksock_hello_msg, kshm_ips) - - offsetof(struct ksock_hello_msg, kshm_src_nid), - timeout); - if (rc) { - CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - return rc; - } - - if (conn->ksnc_flip) { - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %pI4h\n", - hello->kshm_nips, &conn->ksnc_ipaddr); - return -EPROTO; - } - - if (!hello->kshm_nips) - return 0; - - rc = lnet_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc) { - CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - return rc; - } - - for (i = 0; i < (int)hello->kshm_nips; i++) { - if (conn->ksnc_flip) - __swab32s(&hello->kshm_ips[i]); - - if (!hello->kshm_ips[i]) { - CERROR("Zero IP[%d] from ip %pI4h\n", - i, &conn->ksnc_ipaddr); - return -EPROTO; - } - } - - return 0; -} - -static void -ksocknal_pack_msg_v1(struct ksock_tx *tx) -{ - /* V1.x has no KSOCK_MSG_NOOP */ - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_lnetmsg); - - tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(struct lnet_hdr); - - tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr); - tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr); -} - -static void -ksocknal_pack_msg_v2(struct ksock_tx *tx) -{ - tx->tx_iov[0].iov_base = &tx->tx_msg; - - if (tx->tx_lnetmsg) { - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(struct ksock_msg); - tx->tx_nob = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len; - tx->tx_resid = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len; - } else { - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - tx->tx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_nob = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_resid = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - } - /* - * Don't checksum before start sending, because packet can be - * piggybacked with ACK - */ -} - -static void -ksocknal_unpack_msg_v1(struct ksock_msg *msg) -{ - msg->ksm_csum = 0; - msg->ksm_type = KSOCK_MSG_LNET; - msg->ksm_zc_cookies[0] = 0; - msg->ksm_zc_cookies[1] = 0; -} - -static void -ksocknal_unpack_msg_v2(struct ksock_msg *msg) -{ - return; /* Do nothing */ -} - -struct ksock_proto ksocknal_protocol_v1x = { - .pro_version = KSOCK_PROTO_V1, - .pro_send_hello = ksocknal_send_hello_v1, - .pro_recv_hello = ksocknal_recv_hello_v1, - .pro_pack = ksocknal_pack_msg_v1, - .pro_unpack = ksocknal_unpack_msg_v1, - .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1, - .pro_handle_zcreq = NULL, - .pro_handle_zcack = NULL, - .pro_queue_tx_zcack = NULL, - .pro_match_tx = ksocknal_match_tx -}; - -struct ksock_proto ksocknal_protocol_v2x = { - .pro_version = KSOCK_PROTO_V2, - .pro_send_hello = ksocknal_send_hello_v2, - .pro_recv_hello = ksocknal_recv_hello_v2, - .pro_pack = ksocknal_pack_msg_v2, - .pro_unpack = ksocknal_unpack_msg_v2, - .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, - .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2, - .pro_handle_zcreq = ksocknal_handle_zcreq, - .pro_handle_zcack = ksocknal_handle_zcack, - .pro_match_tx = ksocknal_match_tx -}; - -struct ksock_proto ksocknal_protocol_v3x = { - .pro_version = KSOCK_PROTO_V3, - .pro_send_hello = ksocknal_send_hello_v2, - .pro_recv_hello = ksocknal_recv_hello_v2, - .pro_pack = ksocknal_pack_msg_v2, - .pro_unpack = ksocknal_unpack_msg_v2, - .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, - .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3, - .pro_handle_zcreq = ksocknal_handle_zcreq, - .pro_handle_zcack = ksocknal_handle_zcack, - .pro_match_tx = ksocknal_match_tx_v3 -}; diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile deleted file mode 100644 index b7dc7ac11cc5..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET) += libcfs.o - -libcfs-linux-objs := linux-tracefile.o linux-debug.o -libcfs-linux-objs += linux-cpu.o -libcfs-linux-objs += linux-module.o -libcfs-linux-objs += linux-crypto.o -libcfs-linux-objs += linux-crypto-adler.o - -libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) - -libcfs-all-objs := debug.o fail.o module.o tracefile.o \ - libcfs_string.o hash.o \ - libcfs_cpu.o libcfs_mem.o libcfs_lock.o - -libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c deleted file mode 100644 index 1371224a8cb9..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/debug.c +++ /dev/null @@ -1,458 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/debug.c - * - * Author: Phil Schwan <phil@clusterfs.com> - * - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> -#include "tracefile.h" - -static char debug_file_name[1024]; - -unsigned int libcfs_subsystem_debug = ~0; -EXPORT_SYMBOL(libcfs_subsystem_debug); -module_param(libcfs_subsystem_debug, int, 0644); -MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask"); - -unsigned int libcfs_debug = (D_CANTMASK | - D_NETERROR | D_HA | D_CONFIG | D_IOCTL); -EXPORT_SYMBOL(libcfs_debug); -module_param(libcfs_debug, int, 0644); -MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask"); - -static int libcfs_param_debug_mb_set(const char *val, - const struct kernel_param *kp) -{ - int rc; - unsigned int num; - - rc = kstrtouint(val, 0, &num); - if (rc < 0) - return rc; - - if (!*((unsigned int *)kp->arg)) { - *((unsigned int *)kp->arg) = num; - return 0; - } - - rc = cfs_trace_set_debug_mb(num); - - if (!rc) - *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb(); - - return rc; -} - -/* While debug_mb setting look like unsigned int, in fact - * it needs quite a bunch of extra processing, so we define special - * debugmb parameter type with corresponding methods to handle this case - */ -static const struct kernel_param_ops param_ops_debugmb = { - .set = libcfs_param_debug_mb_set, - .get = param_get_uint, -}; - -#define param_check_debugmb(name, p) \ - __param_check(name, p, unsigned int) - -static unsigned int libcfs_debug_mb; -module_param(libcfs_debug_mb, debugmb, 0644); -MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size."); - -unsigned int libcfs_printk = D_CANTMASK; -module_param(libcfs_printk, uint, 0644); -MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask"); - -unsigned int libcfs_console_ratelimit = 1; -module_param(libcfs_console_ratelimit, uint, 0644); -MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)"); - -static int param_set_delay_minmax(const char *val, - const struct kernel_param *kp, - long min, long max) -{ - long d; - int sec; - int rc; - - rc = kstrtoint(val, 0, &sec); - if (rc) - return -EINVAL; - - d = sec * HZ / 100; - if (d < min || d > max) - return -EINVAL; - - *((unsigned int *)kp->arg) = d; - - return 0; -} - -static int param_get_delay(char *buffer, const struct kernel_param *kp) -{ - unsigned int d = *(unsigned int *)kp->arg; - - return sprintf(buffer, "%u", (unsigned int)cfs_duration_sec(d * 100)); -} - -unsigned int libcfs_console_max_delay; -unsigned int libcfs_console_min_delay; - -static int param_set_console_max_delay(const char *val, - const struct kernel_param *kp) -{ - return param_set_delay_minmax(val, kp, - libcfs_console_min_delay, INT_MAX); -} - -static const struct kernel_param_ops param_ops_console_max_delay = { - .set = param_set_console_max_delay, - .get = param_get_delay, -}; - -#define param_check_console_max_delay(name, p) \ - __param_check(name, p, unsigned int) - -module_param(libcfs_console_max_delay, console_max_delay, 0644); -MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)"); - -static int param_set_console_min_delay(const char *val, - const struct kernel_param *kp) -{ - return param_set_delay_minmax(val, kp, - 1, libcfs_console_max_delay); -} - -static const struct kernel_param_ops param_ops_console_min_delay = { - .set = param_set_console_min_delay, - .get = param_get_delay, -}; - -#define param_check_console_min_delay(name, p) \ - __param_check(name, p, unsigned int) - -module_param(libcfs_console_min_delay, console_min_delay, 0644); -MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)"); - -static int param_set_uint_minmax(const char *val, - const struct kernel_param *kp, - unsigned int min, unsigned int max) -{ - unsigned int num; - int ret; - - if (!val) - return -EINVAL; - ret = kstrtouint(val, 0, &num); - if (ret < 0 || num < min || num > max) - return -EINVAL; - *((unsigned int *)kp->arg) = num; - return 0; -} - -static int param_set_uintpos(const char *val, const struct kernel_param *kp) -{ - return param_set_uint_minmax(val, kp, 1, -1); -} - -static const struct kernel_param_ops param_ops_uintpos = { - .set = param_set_uintpos, - .get = param_get_uint, -}; - -#define param_check_uintpos(name, p) \ - __param_check(name, p, unsigned int) - -unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; -module_param(libcfs_console_backoff, uintpos, 0644); -MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor"); - -unsigned int libcfs_debug_binary = 1; - -unsigned int libcfs_stack = 3 * THREAD_SIZE / 4; -EXPORT_SYMBOL(libcfs_stack); - -unsigned int libcfs_catastrophe; -EXPORT_SYMBOL(libcfs_catastrophe); - -unsigned int libcfs_panic_on_lbug = 1; -module_param(libcfs_panic_on_lbug, uint, 0644); -MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG"); - -static wait_queue_head_t debug_ctlwq; - -char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT; - -/* We need to pass a pointer here, but elsewhere this must be a const */ -static char *libcfs_debug_file_path; -module_param(libcfs_debug_file_path, charp, 0644); -MODULE_PARM_DESC(libcfs_debug_file_path, - "Path for dumping debug logs, set 'NONE' to prevent log dumping"); - -int libcfs_panic_in_progress; - -/* libcfs_debug_token2mask() expects the returned string in lower-case */ -static const char * -libcfs_debug_subsys2str(int subsys) -{ - static const char * const libcfs_debug_subsystems[] = - LIBCFS_DEBUG_SUBSYS_NAMES; - - if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems)) - return NULL; - - return libcfs_debug_subsystems[subsys]; -} - -/* libcfs_debug_token2mask() expects the returned string in lower-case */ -static const char * -libcfs_debug_dbg2str(int debug) -{ - static const char * const libcfs_debug_masks[] = - LIBCFS_DEBUG_MASKS_NAMES; - - if (debug >= ARRAY_SIZE(libcfs_debug_masks)) - return NULL; - - return libcfs_debug_masks[debug]; -} - -int -libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int len = 0; - const char *token; - int i; - - if (!mask) { /* "0" */ - if (size > 0) - str[0] = '0'; - len = 1; - } else { /* space-separated tokens */ - for (i = 0; i < 32; i++) { - if (!(mask & (1 << i))) - continue; - - token = fn(i); - if (!token) /* unused bit */ - continue; - - if (len > 0) { /* separator? */ - if (len < size) - str[len] = ' '; - len++; - } - - while (*token) { - if (len < size) - str[len] = *token; - token++; - len++; - } - } - } - - /* terminate 'str' */ - if (len < size) - str[len] = 0; - else - str[size - 1] = 0; - - return len; -} - -int -libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int m = 0; - int matched; - int n; - int t; - - /* Allow a number for backwards compatibility */ - - for (n = strlen(str); n > 0; n--) - if (!isspace(str[n - 1])) - break; - matched = n; - t = sscanf(str, "%i%n", &m, &matched); - if (t >= 1 && matched == n) { - /* don't print warning for lctl set_param debug=0 or -1 */ - if (m && m != -1) - CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n"); - *mask = m; - return 0; - } - - return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK, - 0xffffffff); -} - -/** - * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() - */ -void libcfs_debug_dumplog_internal(void *arg) -{ - static time64_t last_dump_time; - time64_t current_time; - void *journal_info; - - journal_info = current->journal_info; - current->journal_info = NULL; - current_time = ktime_get_real_seconds(); - - if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) && - current_time > last_dump_time) { - last_dump_time = current_time; - snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s.%lld.%ld", libcfs_debug_file_path_arr, - (s64)current_time, (long)arg); - pr_alert("LustreError: dumping log to %s\n", debug_file_name); - cfs_tracefile_dump_all_pages(debug_file_name); - libcfs_run_debug_log_upcall(debug_file_name); - } - - current->journal_info = journal_info; -} - -static int libcfs_debug_dumplog_thread(void *arg) -{ - libcfs_debug_dumplog_internal(arg); - wake_up(&debug_ctlwq); - return 0; -} - -void libcfs_debug_dumplog(void) -{ - wait_queue_entry_t wait; - struct task_struct *dumper; - - /* we're being careful to ensure that the kernel thread is - * able to set our state to running as it exits before we - * get to schedule() - */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&debug_ctlwq, &wait); - - dumper = kthread_run(libcfs_debug_dumplog_thread, - (void *)(long)current_pid(), - "libcfs_debug_dumper"); - set_current_state(TASK_INTERRUPTIBLE); - if (IS_ERR(dumper)) - pr_err("LustreError: cannot start log dump thread: %ld\n", - PTR_ERR(dumper)); - else - schedule(); - - /* be sure to teardown if cfs_create_thread() failed */ - remove_wait_queue(&debug_ctlwq, &wait); - set_current_state(TASK_RUNNING); -} -EXPORT_SYMBOL(libcfs_debug_dumplog); - -int libcfs_debug_init(unsigned long bufsize) -{ - unsigned int max = libcfs_debug_mb; - int rc = 0; - - init_waitqueue_head(&debug_ctlwq); - - if (libcfs_console_max_delay <= 0 || /* not set by user or */ - libcfs_console_min_delay <= 0 || /* set to invalid values */ - libcfs_console_min_delay >= libcfs_console_max_delay) { - libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; - libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; - } - - if (libcfs_debug_file_path) { - strlcpy(libcfs_debug_file_path_arr, - libcfs_debug_file_path, - sizeof(libcfs_debug_file_path_arr)); - } - - /* If libcfs_debug_mb is set to an invalid value or uninitialized - * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES - */ - if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) { - max = TCD_MAX_PAGES; - } else { - max = max / num_possible_cpus(); - max <<= (20 - PAGE_SHIFT); - } - - rc = cfs_tracefile_init(max); - if (!rc) { - libcfs_register_panic_notifier(); - libcfs_debug_mb = cfs_trace_get_debug_mb(); - } - - return rc; -} - -int libcfs_debug_cleanup(void) -{ - libcfs_unregister_panic_notifier(); - cfs_tracefile_exit(); - return 0; -} - -int libcfs_debug_clear_buffer(void) -{ - cfs_trace_flush_pages(); - return 0; -} - -/* Debug markers, although printed by S_LNET should not be marked as such. */ -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_UNDEFINED -int libcfs_debug_mark_buffer(const char *text) -{ - CDEBUG(D_TRACE, - "***************************************************\n"); - LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text); - CDEBUG(D_TRACE, - "***************************************************\n"); - - return 0; -} - -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_LNET diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c deleted file mode 100644 index d3f1e866c6a7..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/fail.c +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Oracle Corporation, Inc. - */ - -#include <linux/libcfs/libcfs.h> - -unsigned long cfs_fail_loc; -EXPORT_SYMBOL(cfs_fail_loc); - -unsigned int cfs_fail_val; -EXPORT_SYMBOL(cfs_fail_val); - -int cfs_fail_err; -EXPORT_SYMBOL(cfs_fail_err); - -DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq); -EXPORT_SYMBOL(cfs_race_waitq); - -int cfs_race_state; -EXPORT_SYMBOL(cfs_race_state); - -int __cfs_fail_check_set(u32 id, u32 value, int set) -{ - static atomic_t cfs_fail_count = ATOMIC_INIT(0); - - LASSERT(!(id & CFS_FAIL_ONCE)); - - if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) == - (CFS_FAILED | CFS_FAIL_ONCE)) { - atomic_set(&cfs_fail_count, 0); /* paranoia */ - return 0; - } - - /* Fail 1/cfs_fail_val times */ - if (cfs_fail_loc & CFS_FAIL_RAND) { - if (cfs_fail_val < 2 || prandom_u32_max(cfs_fail_val) > 0) - return 0; - } - - /* Skip the first cfs_fail_val, then fail */ - if (cfs_fail_loc & CFS_FAIL_SKIP) { - if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val) - return 0; - } - - /* check cfs_fail_val... */ - if (set == CFS_FAIL_LOC_VALUE) { - if (cfs_fail_val != -1 && cfs_fail_val != value) - return 0; - } - - /* Fail cfs_fail_val times, overridden by FAIL_ONCE */ - if (cfs_fail_loc & CFS_FAIL_SOME && - (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) { - int count = atomic_inc_return(&cfs_fail_count); - - if (count >= cfs_fail_val) { - set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); - atomic_set(&cfs_fail_count, 0); - /* we are lost race to increase */ - if (count > cfs_fail_val) - return 0; - } - } - - /* Take into account the current call for FAIL_ONCE for ORSET only, - * as RESET is a new fail_loc, it does not change the current call - */ - if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE)) - set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); - /* Lost race to set CFS_FAILED_BIT. */ - if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) { - /* If CFS_FAIL_ONCE is valid, only one process can fail, - * otherwise multi-process can fail at the same time. - */ - if (cfs_fail_loc & CFS_FAIL_ONCE) - return 0; - } - - switch (set) { - case CFS_FAIL_LOC_NOSET: - case CFS_FAIL_LOC_VALUE: - break; - case CFS_FAIL_LOC_ORSET: - cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE); - break; - case CFS_FAIL_LOC_RESET: - cfs_fail_loc = value; - atomic_set(&cfs_fail_count, 0); - break; - default: - LASSERTF(0, "called with bad set %u\n", set); - break; - } - - return 1; -} -EXPORT_SYMBOL(__cfs_fail_check_set); - -int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set) -{ - int ret; - - ret = __cfs_fail_check_set(id, value, set); - if (ret && likely(ms > 0)) { - CERROR("cfs_fail_timeout id %x sleeping for %dms\n", - id, ms); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ms * HZ / 1000); - CERROR("cfs_fail_timeout id %x awake\n", id); - } - return ret; -} -EXPORT_SYMBOL(__cfs_fail_timeout_set); diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c deleted file mode 100644 index f7b3c9306456..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/hash.c +++ /dev/null @@ -1,2064 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/hash.c - * - * Implement a hash class for hash process in lustre system. - * - * Author: YuZhangyong <yzy@clusterfs.com> - * - * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov> - * - Simplified API and improved documentation - * - Added per-hash feature flags: - * * CFS_HASH_DEBUG additional validation - * * CFS_HASH_REHASH dynamic rehashing - * - Added per-hash statistics - * - General performance enhancements - * - * 2009-07-31: Liang Zhen <zhen.liang@sun.com> - * - move all stuff to libcfs - * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH - * - ignore hs_rwlock if without CFS_HASH_REHASH setting - * - buckets are allocated one by one(instead of contiguous memory), - * to avoid unnecessary cacheline conflict - * - * 2010-03-01: Liang Zhen <zhen.liang@sun.com> - * - "bucket" is a group of hlist_head now, user can specify bucket size - * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share - * one lock for reducing memory overhead. - * - * - support lockless hash, caller will take care of locks: - * avoid lock overhead for hash tables that are already protected - * by locking in the caller for another reason - * - * - support both spin_lock/rwlock for bucket: - * overhead of spinlock contention is lower than read/write - * contention of rwlock, so using spinlock to serialize operations on - * bucket is more reasonable for those frequently changed hash tables - * - * - support one-single lock mode: - * one lock to protect all hash operations to avoid overhead of - * multiple locks if hash table is always small - * - * - removed a lot of unnecessary addref & decref on hash element: - * addref & decref are atomic operations in many use-cases which - * are expensive. - * - * - support non-blocking cfs_hash_add() and cfs_hash_findadd(): - * some lustre use-cases require these functions to be strictly - * non-blocking, we need to schedule required rehash on a different - * thread on those cases. - * - * - safer rehash on large hash table - * In old implementation, rehash function will exclusively lock the - * hash table and finish rehash in one batch, it's dangerous on SMP - * system because rehash millions of elements could take long time. - * New implemented rehash can release lock and relax CPU in middle - * of rehash, it's safe for another thread to search/change on the - * hash table even it's in rehasing. - * - * - support two different refcount modes - * . hash table has refcount on element - * . hash table doesn't change refcount on adding/removing element - * - * - support long name hash table (for param-tree) - * - * - fix a bug for cfs_hash_rehash_key: - * in old implementation, cfs_hash_rehash_key could screw up the - * hash-table because @key is overwritten without any protection. - * Now we need user to define hs_keycpy for those rehash enabled - * hash tables, cfs_hash_rehash_key will overwrite hash-key - * inside lock by calling hs_keycpy. - * - * - better hash iteration: - * Now we support both locked iteration & lockless iteration of hash - * table. Also, user can break the iteration by return 1 in callback. - */ -#include <linux/seq_file.h> -#include <linux/log2.h> - -#include <linux/libcfs/libcfs.h> - -#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 -static unsigned int warn_on_depth = 8; -module_param(warn_on_depth, uint, 0644); -MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high."); -#endif - -struct workqueue_struct *cfs_rehash_wq; - -static inline void -cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {} - -static inline void -cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {} - -static inline void -cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive) - __acquires(&lock->spin) -{ - spin_lock(&lock->spin); -} - -static inline void -cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive) - __releases(&lock->spin) -{ - spin_unlock(&lock->spin); -} - -static inline void -cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive) - __acquires(&lock->rw) -{ - if (!exclusive) - read_lock(&lock->rw); - else - write_lock(&lock->rw); -} - -static inline void -cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive) - __releases(&lock->rw) -{ - if (!exclusive) - read_unlock(&lock->rw); - else - write_unlock(&lock->rw); -} - -/** No lock hash */ -static struct cfs_hash_lock_ops cfs_hash_nl_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_nl_lock, - .hs_bkt_unlock = cfs_hash_nl_unlock, -}; - -/** no bucket lock, one spinlock to protect everything */ -static struct cfs_hash_lock_ops cfs_hash_nbl_lops = { - .hs_lock = cfs_hash_spin_lock, - .hs_unlock = cfs_hash_spin_unlock, - .hs_bkt_lock = cfs_hash_nl_lock, - .hs_bkt_unlock = cfs_hash_nl_unlock, -}; - -/** spin bucket lock, rehash is enabled */ -static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = { - .hs_lock = cfs_hash_rw_lock, - .hs_unlock = cfs_hash_rw_unlock, - .hs_bkt_lock = cfs_hash_spin_lock, - .hs_bkt_unlock = cfs_hash_spin_unlock, -}; - -/** rw bucket lock, rehash is enabled */ -static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = { - .hs_lock = cfs_hash_rw_lock, - .hs_unlock = cfs_hash_rw_unlock, - .hs_bkt_lock = cfs_hash_rw_lock, - .hs_bkt_unlock = cfs_hash_rw_unlock, -}; - -/** spin bucket lock, rehash is disabled */ -static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_spin_lock, - .hs_bkt_unlock = cfs_hash_spin_unlock, -}; - -/** rw bucket lock, rehash is disabled */ -static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_rw_lock, - .hs_bkt_unlock = cfs_hash_rw_unlock, -}; - -static void -cfs_hash_lock_setup(struct cfs_hash *hs) -{ - if (cfs_hash_with_no_lock(hs)) { - hs->hs_lops = &cfs_hash_nl_lops; - - } else if (cfs_hash_with_no_bktlock(hs)) { - hs->hs_lops = &cfs_hash_nbl_lops; - spin_lock_init(&hs->hs_lock.spin); - - } else if (cfs_hash_with_rehash(hs)) { - rwlock_init(&hs->hs_lock.rw); - - if (cfs_hash_with_rw_bktlock(hs)) - hs->hs_lops = &cfs_hash_bkt_rw_lops; - else if (cfs_hash_with_spin_bktlock(hs)) - hs->hs_lops = &cfs_hash_bkt_spin_lops; - else - LBUG(); - } else { - if (cfs_hash_with_rw_bktlock(hs)) - hs->hs_lops = &cfs_hash_nr_bkt_rw_lops; - else if (cfs_hash_with_spin_bktlock(hs)) - hs->hs_lops = &cfs_hash_nr_bkt_spin_lops; - else - LBUG(); - } -} - -/** - * Simple hash head without depth tracking - * new element is always added to head of hlist - */ -struct cfs_hash_head { - struct hlist_head hh_head; /**< entries list */ -}; - -static int -cfs_hash_hh_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_head); -} - -static struct hlist_head * -cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_head *head; - - head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].hh_head; -} - -static int -cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd)); - return -1; /* unknown depth */ -} - -static int -cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hlist_del_init(hnode); - return -1; /* unknown depth */ -} - -/** - * Simple hash head with depth tracking - * new element is always added to head of hlist - */ -struct cfs_hash_head_dep { - struct hlist_head hd_head; /**< entries list */ - unsigned int hd_depth; /**< list length */ -}; - -static int -cfs_hash_hd_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_head_dep); -} - -static struct hlist_head * -cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_head_dep *head; - - head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].hd_head; -} - -static int -cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_head_dep *hh; - - hh = container_of(cfs_hash_hd_hhead(hs, bd), - struct cfs_hash_head_dep, hd_head); - hlist_add_head(hnode, &hh->hd_head); - return ++hh->hd_depth; -} - -static int -cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_head_dep *hh; - - hh = container_of(cfs_hash_hd_hhead(hs, bd), - struct cfs_hash_head_dep, hd_head); - hlist_del_init(hnode); - return --hh->hd_depth; -} - -/** - * double links hash head without depth tracking - * new element is always added to tail of hlist - */ -struct cfs_hash_dhead { - struct hlist_head dh_head; /**< entries list */ - struct hlist_node *dh_tail; /**< the last entry */ -}; - -static int -cfs_hash_dh_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_dhead); -} - -static struct hlist_head * -cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_dhead *head; - - head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].dh_head; -} - -static int -cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_dhead *dh; - - dh = container_of(cfs_hash_dh_hhead(hs, bd), - struct cfs_hash_dhead, dh_head); - if (dh->dh_tail) /* not empty */ - hlist_add_behind(hnode, dh->dh_tail); - else /* empty list */ - hlist_add_head(hnode, &dh->dh_head); - dh->dh_tail = hnode; - return -1; /* unknown depth */ -} - -static int -cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnd) -{ - struct cfs_hash_dhead *dh; - - dh = container_of(cfs_hash_dh_hhead(hs, bd), - struct cfs_hash_dhead, dh_head); - if (!hnd->next) { /* it's the tail */ - dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL : - container_of(hnd->pprev, struct hlist_node, next); - } - hlist_del_init(hnd); - return -1; /* unknown depth */ -} - -/** - * double links hash head with depth tracking - * new element is always added to tail of hlist - */ -struct cfs_hash_dhead_dep { - struct hlist_head dd_head; /**< entries list */ - struct hlist_node *dd_tail; /**< the last entry */ - unsigned int dd_depth; /**< list length */ -}; - -static int -cfs_hash_dd_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_dhead_dep); -} - -static struct hlist_head * -cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_dhead_dep *head; - - head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].dd_head; -} - -static int -cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_dhead_dep *dh; - - dh = container_of(cfs_hash_dd_hhead(hs, bd), - struct cfs_hash_dhead_dep, dd_head); - if (dh->dd_tail) /* not empty */ - hlist_add_behind(hnode, dh->dd_tail); - else /* empty list */ - hlist_add_head(hnode, &dh->dd_head); - dh->dd_tail = hnode; - return ++dh->dd_depth; -} - -static int -cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnd) -{ - struct cfs_hash_dhead_dep *dh; - - dh = container_of(cfs_hash_dd_hhead(hs, bd), - struct cfs_hash_dhead_dep, dd_head); - if (!hnd->next) { /* it's the tail */ - dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL : - container_of(hnd->pprev, struct hlist_node, next); - } - hlist_del_init(hnd); - return --dh->dd_depth; -} - -static struct cfs_hash_hlist_ops cfs_hash_hh_hops = { - .hop_hhead = cfs_hash_hh_hhead, - .hop_hhead_size = cfs_hash_hh_hhead_size, - .hop_hnode_add = cfs_hash_hh_hnode_add, - .hop_hnode_del = cfs_hash_hh_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_hd_hops = { - .hop_hhead = cfs_hash_hd_hhead, - .hop_hhead_size = cfs_hash_hd_hhead_size, - .hop_hnode_add = cfs_hash_hd_hnode_add, - .hop_hnode_del = cfs_hash_hd_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_dh_hops = { - .hop_hhead = cfs_hash_dh_hhead, - .hop_hhead_size = cfs_hash_dh_hhead_size, - .hop_hnode_add = cfs_hash_dh_hnode_add, - .hop_hnode_del = cfs_hash_dh_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_dd_hops = { - .hop_hhead = cfs_hash_dd_hhead, - .hop_hhead_size = cfs_hash_dd_hhead_size, - .hop_hnode_add = cfs_hash_dd_hnode_add, - .hop_hnode_del = cfs_hash_dd_hnode_del, -}; - -static void -cfs_hash_hlist_setup(struct cfs_hash *hs) -{ - if (cfs_hash_with_add_tail(hs)) { - hs->hs_hops = cfs_hash_with_depth(hs) ? - &cfs_hash_dd_hops : &cfs_hash_dh_hops; - } else { - hs->hs_hops = cfs_hash_with_depth(hs) ? - &cfs_hash_hd_hops : &cfs_hash_hh_hops; - } -} - -static void -cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts, - unsigned int bits, const void *key, struct cfs_hash_bd *bd) -{ - unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1); - - LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits); - - bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)]; - bd->bd_offset = index >> (bits - hs->hs_bkt_bits); -} - -void -cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (likely(!hs->hs_rehash_buckets)) { - cfs_hash_bd_from_key(hs, hs->hs_buckets, - hs->hs_cur_bits, key, bd); - } else { - LASSERT(hs->hs_rehash_bits); - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, bd); - } -} -EXPORT_SYMBOL(cfs_hash_bd_get); - -static inline void -cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur) -{ - if (likely(dep_cur <= bd->bd_bucket->hsb_depmax)) - return; - - bd->bd_bucket->hsb_depmax = dep_cur; -# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 - if (likely(!warn_on_depth || - max(warn_on_depth, hs->hs_dep_max) >= dep_cur)) - return; - - spin_lock(&hs->hs_dep_lock); - hs->hs_dep_max = dep_cur; - hs->hs_dep_bkt = bd->bd_bucket->hsb_index; - hs->hs_dep_off = bd->bd_offset; - hs->hs_dep_bits = hs->hs_cur_bits; - spin_unlock(&hs->hs_dep_lock); - - queue_work(cfs_rehash_wq, &hs->hs_dep_work); -# endif -} - -void -cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - int rc; - - rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode); - cfs_hash_bd_dep_record(hs, bd, rc); - bd->bd_bucket->hsb_version++; - if (unlikely(!bd->bd_bucket->hsb_version)) - bd->bd_bucket->hsb_version++; - bd->bd_bucket->hsb_count++; - - if (cfs_hash_with_counter(hs)) - atomic_inc(&hs->hs_count); - if (!cfs_hash_with_no_itemref(hs)) - cfs_hash_get(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_bd_add_locked); - -void -cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hs->hs_hops->hop_hnode_del(hs, bd, hnode); - - LASSERT(bd->bd_bucket->hsb_count > 0); - bd->bd_bucket->hsb_count--; - bd->bd_bucket->hsb_version++; - if (unlikely(!bd->bd_bucket->hsb_version)) - bd->bd_bucket->hsb_version++; - - if (cfs_hash_with_counter(hs)) { - LASSERT(atomic_read(&hs->hs_count) > 0); - atomic_dec(&hs->hs_count); - } - if (!cfs_hash_with_no_itemref(hs)) - cfs_hash_put_locked(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_bd_del_locked); - -void -cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, - struct cfs_hash_bd *bd_new, struct hlist_node *hnode) -{ - struct cfs_hash_bucket *obkt = bd_old->bd_bucket; - struct cfs_hash_bucket *nbkt = bd_new->bd_bucket; - int rc; - - if (!cfs_hash_bd_compare(bd_old, bd_new)) - return; - - /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops - * in cfs_hash_bd_del/add_locked - */ - hs->hs_hops->hop_hnode_del(hs, bd_old, hnode); - rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode); - cfs_hash_bd_dep_record(hs, bd_new, rc); - - LASSERT(obkt->hsb_count > 0); - obkt->hsb_count--; - obkt->hsb_version++; - if (unlikely(!obkt->hsb_version)) - obkt->hsb_version++; - nbkt->hsb_count++; - nbkt->hsb_version++; - if (unlikely(!nbkt->hsb_version)) - nbkt->hsb_version++; -} - -enum { - /** always set, for sanity (avoid ZERO intent) */ - CFS_HS_LOOKUP_MASK_FIND = BIT(0), - /** return entry with a ref */ - CFS_HS_LOOKUP_MASK_REF = BIT(1), - /** add entry if not existing */ - CFS_HS_LOOKUP_MASK_ADD = BIT(2), - /** delete entry, ignore other masks */ - CFS_HS_LOOKUP_MASK_DEL = BIT(3), -}; - -enum cfs_hash_lookup_intent { - /** return item w/o refcount */ - CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND, - /** return item with refcount */ - CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_REF), - /** return item w/o refcount if existed, otherwise add */ - CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_ADD), - /** return item with refcount if existed, otherwise add */ - CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND | - CFS_HS_LOOKUP_MASK_ADD), - /** delete if existed */ - CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_DEL) -}; - -static struct hlist_node * -cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key, struct hlist_node *hnode, - enum cfs_hash_lookup_intent intent) - -{ - struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd); - struct hlist_node *ehnode; - struct hlist_node *match; - int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD; - - /* with this function, we can avoid a lot of useless refcount ops, - * which are expensive atomic operations most time. - */ - match = intent_add ? NULL : hnode; - hlist_for_each(ehnode, hhead) { - if (!cfs_hash_keycmp(hs, key, ehnode)) - continue; - - if (match && match != ehnode) /* can't match */ - continue; - - /* match and ... */ - if (intent & CFS_HS_LOOKUP_MASK_DEL) { - cfs_hash_bd_del_locked(hs, bd, ehnode); - return ehnode; - } - - /* caller wants refcount? */ - if (intent & CFS_HS_LOOKUP_MASK_REF) - cfs_hash_get(hs, ehnode); - return ehnode; - } - /* no match item */ - if (!intent_add) - return NULL; - - LASSERT(hnode); - cfs_hash_bd_add_locked(hs, bd, hnode); - return hnode; -} - -struct hlist_node * -cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key) -{ - return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, - CFS_HS_LOOKUP_IT_FIND); -} -EXPORT_SYMBOL(cfs_hash_bd_lookup_locked); - -struct hlist_node * -cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key) -{ - return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, - CFS_HS_LOOKUP_IT_PEEK); -} -EXPORT_SYMBOL(cfs_hash_bd_peek_locked); - -static void -cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, int excl) -{ - struct cfs_hash_bucket *prev = NULL; - int i; - - /** - * bds must be ascendantly ordered by bd->bd_bucket->hsb_index. - * NB: it's possible that several bds point to the same bucket but - * have different bd::bd_offset, so need take care of deadlock. - */ - cfs_hash_for_each_bd(bds, n, i) { - if (prev == bds[i].bd_bucket) - continue; - - LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index); - cfs_hash_bd_lock(hs, &bds[i], excl); - prev = bds[i].bd_bucket; - } -} - -static void -cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, int excl) -{ - struct cfs_hash_bucket *prev = NULL; - int i; - - cfs_hash_for_each_bd(bds, n, i) { - if (prev != bds[i].bd_bucket) { - cfs_hash_bd_unlock(hs, &bds[i], excl); - prev = bds[i].bd_bucket; - } - } -} - -static struct hlist_node * -cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, const void *key) -{ - struct hlist_node *ehnode; - unsigned int i; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL, - CFS_HS_LOOKUP_IT_FIND); - if (ehnode) - return ehnode; - } - return NULL; -} - -static struct hlist_node * -cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, const void *key, - struct hlist_node *hnode, int noref) -{ - struct hlist_node *ehnode; - int intent; - unsigned int i; - - LASSERT(hnode); - intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, - NULL, intent); - if (ehnode) - return ehnode; - } - - if (i == 1) { /* only one bucket */ - cfs_hash_bd_add_locked(hs, &bds[0], hnode); - } else { - struct cfs_hash_bd mybd; - - cfs_hash_bd_get(hs, key, &mybd); - cfs_hash_bd_add_locked(hs, &mybd, hnode); - } - - return hnode; -} - -static struct hlist_node * -cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, const void *key, - struct hlist_node *hnode) -{ - struct hlist_node *ehnode; - unsigned int i; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode, - CFS_HS_LOOKUP_IT_FINDDEL); - if (ehnode) - return ehnode; - } - return NULL; -} - -static void -cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2) -{ - int rc; - - if (!bd2->bd_bucket) - return; - - if (!bd1->bd_bucket) { - *bd1 = *bd2; - bd2->bd_bucket = NULL; - return; - } - - rc = cfs_hash_bd_compare(bd1, bd2); - if (!rc) - bd2->bd_bucket = NULL; - else if (rc > 0) - swap(*bd1, *bd2); /* swap bd1 and bd2 */ -} - -void -cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, - struct cfs_hash_bd *bds) -{ - /* NB: caller should hold hs_lock.rw if REHASH is set */ - cfs_hash_bd_from_key(hs, hs->hs_buckets, - hs->hs_cur_bits, key, &bds[0]); - if (likely(!hs->hs_rehash_buckets)) { - /* no rehash or not rehashing */ - bds[1].bd_bucket = NULL; - return; - } - - LASSERT(hs->hs_rehash_bits); - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, &bds[1]); - - cfs_hash_bd_order(&bds[0], &bds[1]); -} - -void -cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) -{ - cfs_hash_multi_bd_lock(hs, bds, 2, excl); -} - -void -cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) -{ - cfs_hash_multi_bd_unlock(hs, bds, 2, excl); -} - -struct hlist_node * -cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key) -{ - return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key); -} - -struct hlist_node * -cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode, - int noref) -{ - return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key, - hnode, noref); -} - -struct hlist_node * -cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode) -{ - return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode); -} - -static void -cfs_hash_buckets_free(struct cfs_hash_bucket **buckets, - int bkt_size, int prev_size, int size) -{ - int i; - - for (i = prev_size; i < size; i++) - kfree(buckets[i]); - - kvfree(buckets); -} - -/* - * Create or grow bucket memory. Return old_buckets if no allocation was - * needed, the newly allocated buckets if allocation was needed and - * successful, and NULL on error. - */ -static struct cfs_hash_bucket ** -cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, - unsigned int old_size, unsigned int new_size) -{ - struct cfs_hash_bucket **new_bkts; - int i; - - LASSERT(!old_size || old_bkts); - - if (old_bkts && old_size == new_size) - return old_bkts; - - new_bkts = kvmalloc_array(new_size, sizeof(new_bkts[0]), GFP_KERNEL); - if (!new_bkts) - return NULL; - - if (old_bkts) { - memcpy(new_bkts, old_bkts, - min(old_size, new_size) * sizeof(*old_bkts)); - } - - for (i = old_size; i < new_size; i++) { - struct hlist_head *hhead; - struct cfs_hash_bd bd; - - new_bkts[i] = kzalloc(cfs_hash_bkt_size(hs), GFP_KERNEL); - if (!new_bkts[i]) { - cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs), - old_size, new_size); - return NULL; - } - - new_bkts[i]->hsb_index = i; - new_bkts[i]->hsb_version = 1; /* shouldn't be zero */ - new_bkts[i]->hsb_depmax = -1; /* unknown */ - bd.bd_bucket = new_bkts[i]; - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) - INIT_HLIST_HEAD(hhead); - - if (cfs_hash_with_no_lock(hs) || - cfs_hash_with_no_bktlock(hs)) - continue; - - if (cfs_hash_with_rw_bktlock(hs)) - rwlock_init(&new_bkts[i]->hsb_lock.rw); - else if (cfs_hash_with_spin_bktlock(hs)) - spin_lock_init(&new_bkts[i]->hsb_lock.spin); - else - LBUG(); /* invalid use-case */ - } - return new_bkts; -} - -/** - * Initialize new libcfs hash, where: - * @name - Descriptive hash name - * @cur_bits - Initial hash table size, in bits - * @max_bits - Maximum allowed hash table resize, in bits - * @ops - Registered hash table operations - * @flags - CFS_HASH_REHASH enable synamic hash resizing - * - CFS_HASH_SORT enable chained hash sort - */ -static void cfs_hash_rehash_worker(struct work_struct *work); - -#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 -static void cfs_hash_dep_print(struct work_struct *work) -{ - struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_dep_work); - int dep; - int bkt; - int off; - int bits; - - spin_lock(&hs->hs_dep_lock); - dep = hs->hs_dep_max; - bkt = hs->hs_dep_bkt; - off = hs->hs_dep_off; - bits = hs->hs_dep_bits; - spin_unlock(&hs->hs_dep_lock); - - LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n", - hs->hs_name, bits, dep, bkt, off); - spin_lock(&hs->hs_dep_lock); - hs->hs_dep_bits = 0; /* mark as workitem done */ - spin_unlock(&hs->hs_dep_lock); - return 0; -} - -static void cfs_hash_depth_wi_init(struct cfs_hash *hs) -{ - spin_lock_init(&hs->hs_dep_lock); - INIT_WORK(&hs->hs_dep_work, cfs_hash_dep_print); -} - -static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) -{ - cancel_work_sync(&hs->hs_dep_work); -} - -#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */ - -static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {} -static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {} - -#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */ - -struct cfs_hash * -cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits, - unsigned int bkt_bits, unsigned int extra_bytes, - unsigned int min_theta, unsigned int max_theta, - struct cfs_hash_ops *ops, unsigned int flags) -{ - struct cfs_hash *hs; - int len; - - BUILD_BUG_ON(CFS_HASH_THETA_BITS >= 15); - - LASSERT(name); - LASSERT(ops->hs_key); - LASSERT(ops->hs_hash); - LASSERT(ops->hs_object); - LASSERT(ops->hs_keycmp); - LASSERT(ops->hs_get); - LASSERT(ops->hs_put || ops->hs_put_locked); - - if (flags & CFS_HASH_REHASH) - flags |= CFS_HASH_COUNTER; /* must have counter */ - - LASSERT(cur_bits > 0); - LASSERT(cur_bits >= bkt_bits); - LASSERT(max_bits >= cur_bits && max_bits < 31); - LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits)); - LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK))); - LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy)); - - len = !(flags & CFS_HASH_BIGNAME) ? - CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN; - hs = kzalloc(offsetof(struct cfs_hash, hs_name[len]), GFP_KERNEL); - if (!hs) - return NULL; - - strlcpy(hs->hs_name, name, len); - hs->hs_flags = flags; - - atomic_set(&hs->hs_refcount, 1); - atomic_set(&hs->hs_count, 0); - - cfs_hash_lock_setup(hs); - cfs_hash_hlist_setup(hs); - - hs->hs_cur_bits = (u8)cur_bits; - hs->hs_min_bits = (u8)cur_bits; - hs->hs_max_bits = (u8)max_bits; - hs->hs_bkt_bits = (u8)bkt_bits; - - hs->hs_ops = ops; - hs->hs_extra_bytes = extra_bytes; - hs->hs_rehash_bits = 0; - INIT_WORK(&hs->hs_rehash_work, cfs_hash_rehash_worker); - cfs_hash_depth_wi_init(hs); - - if (cfs_hash_with_rehash(hs)) - __cfs_hash_set_theta(hs, min_theta, max_theta); - - hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0, - CFS_HASH_NBKT(hs)); - if (hs->hs_buckets) - return hs; - - kfree(hs); - return NULL; -} -EXPORT_SYMBOL(cfs_hash_create); - -/** - * Cleanup libcfs hash @hs. - */ -static void -cfs_hash_destroy(struct cfs_hash *hs) -{ - struct hlist_node *hnode; - struct hlist_node *pos; - struct cfs_hash_bd bd; - int i; - - LASSERT(hs); - LASSERT(!cfs_hash_is_exiting(hs) && - !cfs_hash_is_iterating(hs)); - - /** - * prohibit further rehashes, don't need any lock because - * I'm the only (last) one can change it. - */ - hs->hs_exiting = 1; - if (cfs_hash_with_rehash(hs)) - cfs_hash_rehash_cancel(hs); - - cfs_hash_depth_wi_cancel(hs); - /* rehash should be done/canceled */ - LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - LASSERT(bd.bd_bucket); - /* no need to take this lock, just for consistent code */ - cfs_hash_bd_lock(hs, &bd, 1); - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - LASSERTF(!cfs_hash_with_assert_empty(hs), - "hash %s bucket %u(%u) is not empty: %u items left\n", - hs->hs_name, bd.bd_bucket->hsb_index, - bd.bd_offset, bd.bd_bucket->hsb_count); - /* can't assert key valicate, because we - * can interrupt rehash - */ - cfs_hash_bd_del_locked(hs, &bd, hnode); - cfs_hash_exit(hs, hnode); - } - } - LASSERT(!bd.bd_bucket->hsb_count); - cfs_hash_bd_unlock(hs, &bd, 1); - cond_resched(); - } - - LASSERT(!atomic_read(&hs->hs_count)); - - cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs), - 0, CFS_HASH_NBKT(hs)); - i = cfs_hash_with_bigname(hs) ? - CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN; - kfree(hs); -} - -struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs) -{ - if (atomic_inc_not_zero(&hs->hs_refcount)) - return hs; - return NULL; -} -EXPORT_SYMBOL(cfs_hash_getref); - -void cfs_hash_putref(struct cfs_hash *hs) -{ - if (atomic_dec_and_test(&hs->hs_refcount)) - cfs_hash_destroy(hs); -} -EXPORT_SYMBOL(cfs_hash_putref); - -static inline int -cfs_hash_rehash_bits(struct cfs_hash *hs) -{ - if (cfs_hash_with_no_lock(hs) || - !cfs_hash_with_rehash(hs)) - return -EOPNOTSUPP; - - if (unlikely(cfs_hash_is_exiting(hs))) - return -ESRCH; - - if (unlikely(cfs_hash_is_rehashing(hs))) - return -EALREADY; - - if (unlikely(cfs_hash_is_iterating(hs))) - return -EAGAIN; - - /* XXX: need to handle case with max_theta != 2.0 - * and the case with min_theta != 0.5 - */ - if ((hs->hs_cur_bits < hs->hs_max_bits) && - (__cfs_hash_theta(hs) > hs->hs_max_theta)) - return hs->hs_cur_bits + 1; - - if (!cfs_hash_with_shrink(hs)) - return 0; - - if ((hs->hs_cur_bits > hs->hs_min_bits) && - (__cfs_hash_theta(hs) < hs->hs_min_theta)) - return hs->hs_cur_bits - 1; - - return 0; -} - -/** - * don't allow inline rehash if: - * - user wants non-blocking change (add/del) on hash table - * - too many elements - */ -static inline int -cfs_hash_rehash_inline(struct cfs_hash *hs) -{ - return !cfs_hash_with_nblk_change(hs) && - atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG; -} - -/** - * Add item @hnode to libcfs hash @hs using @key. The registered - * ops->hs_get function will be called when the item is added. - */ -void -cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) -{ - struct cfs_hash_bd bd; - int bits; - - LASSERT(hlist_unhashed(hnode)); - - cfs_hash_lock(hs, 0); - cfs_hash_bd_get_and_lock(hs, key, &bd, 1); - - cfs_hash_key_validate(hs, key, hnode); - cfs_hash_bd_add_locked(hs, &bd, hnode); - - cfs_hash_bd_unlock(hs, &bd, 1); - - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); -} -EXPORT_SYMBOL(cfs_hash_add); - -static struct hlist_node * -cfs_hash_find_or_add(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode, int noref) -{ - struct hlist_node *ehnode; - struct cfs_hash_bd bds[2]; - int bits = 0; - - LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode); - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); - - cfs_hash_key_validate(hs, key, hnode); - ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key, - hnode, noref); - cfs_hash_dual_bd_unlock(hs, bds, 1); - - if (ehnode == hnode) /* new item added */ - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); - - return ehnode; -} - -/** - * Add item @hnode to libcfs hash @hs using @key. The registered - * ops->hs_get function will be called if the item was added. - * Returns 0 on success or -EALREADY on key collisions. - */ -int -cfs_hash_add_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) -{ - return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ? - -EALREADY : 0; -} -EXPORT_SYMBOL(cfs_hash_add_unique); - -/** - * Add item @hnode to libcfs hash @hs using @key. If this @key - * already exists in the hash then ops->hs_get will be called on the - * conflicting entry and that entry will be returned to the caller. - * Otherwise ops->hs_get is called on the item which was added. - */ -void * -cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) -{ - hnode = cfs_hash_find_or_add(hs, key, hnode, 0); - - return cfs_hash_object(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_findadd_unique); - -/** - * Delete item @hnode from the libcfs hash @hs using @key. The @key - * is required to ensure the correct hash bucket is locked since there - * is no direct linkage from the item to the bucket. The object - * removed from the hash will be returned and obs->hs_put is called - * on the removed object. - */ -void * -cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) -{ - void *obj = NULL; - int bits = 0; - struct cfs_hash_bd bds[2]; - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); - - /* NB: do nothing if @hnode is not in hash table */ - if (!hnode || !hlist_unhashed(hnode)) { - if (!bds[1].bd_bucket && hnode) { - cfs_hash_bd_del_locked(hs, &bds[0], hnode); - } else { - hnode = cfs_hash_dual_bd_finddel_locked(hs, bds, - key, hnode); - } - } - - if (hnode) { - obj = cfs_hash_object(hs, hnode); - bits = cfs_hash_rehash_bits(hs); - } - - cfs_hash_dual_bd_unlock(hs, bds, 1); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); - - return obj; -} -EXPORT_SYMBOL(cfs_hash_del); - -/** - * Delete item given @key in libcfs hash @hs. The first @key found in - * the hash will be removed, if the key exists multiple times in the hash - * @hs this function must be called once per key. The removed object - * will be returned and ops->hs_put is called on the removed object. - */ -void * -cfs_hash_del_key(struct cfs_hash *hs, const void *key) -{ - return cfs_hash_del(hs, key, NULL); -} -EXPORT_SYMBOL(cfs_hash_del_key); - -/** - * Lookup an item using @key in the libcfs hash @hs and return it. - * If the @key is found in the hash hs->hs_get() is called and the - * matching objects is returned. It is the callers responsibility - * to call the counterpart ops->hs_put using the cfs_hash_put() macro - * when when finished with the object. If the @key was not found - * in the hash @hs NULL is returned. - */ -void * -cfs_hash_lookup(struct cfs_hash *hs, const void *key) -{ - void *obj = NULL; - struct hlist_node *hnode; - struct cfs_hash_bd bds[2]; - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); - - hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key); - if (hnode) - obj = cfs_hash_object(hs, hnode); - - cfs_hash_dual_bd_unlock(hs, bds, 0); - cfs_hash_unlock(hs, 0); - - return obj; -} -EXPORT_SYMBOL(cfs_hash_lookup); - -static void -cfs_hash_for_each_enter(struct cfs_hash *hs) -{ - LASSERT(!cfs_hash_is_exiting(hs)); - - if (!cfs_hash_with_rehash(hs)) - return; - /* - * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter - * because it's just an unreliable signal to rehash-thread, - * rehash-thread will try to finish rehash ASAP when seeing this. - */ - hs->hs_iterating = 1; - - cfs_hash_lock(hs, 1); - hs->hs_iterators++; - cfs_hash_unlock(hs, 1); - - /* NB: iteration is mostly called by service thread, - * we tend to cancel pending rehash-request, instead of - * blocking service thread, we will relaunch rehash request - * after iteration - */ - if (cfs_hash_is_rehashing(hs)) - cfs_hash_rehash_cancel(hs); -} - -static void -cfs_hash_for_each_exit(struct cfs_hash *hs) -{ - int remained; - int bits; - - if (!cfs_hash_with_rehash(hs)) - return; - cfs_hash_lock(hs, 1); - remained = --hs->hs_iterators; - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 1); - /* NB: it's race on cfs_has_t::hs_iterating, see above */ - if (!remained) - hs->hs_iterating = 0; - if (bits > 0) { - cfs_hash_rehash(hs, atomic_read(&hs->hs_count) < - CFS_HASH_LOOP_HOG); - } -} - -/** - * For each item in the libcfs hash @hs call the passed callback @func - * and pass to it as an argument each hash item and the private @data. - * - * a) the function may sleep! - * b) during the callback: - * . the bucket lock is held so the callback must never sleep. - * . if @removal_safe is true, use can remove current item by - * cfs_hash_bd_del_locked - */ -static u64 -cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data, int remove_safe) -{ - struct hlist_node *hnode; - struct hlist_node *pos; - struct cfs_hash_bd bd; - u64 count = 0; - int excl = !!remove_safe; - int loop = 0; - int i; - - cfs_hash_for_each_enter(hs); - - cfs_hash_lock(hs, 0); - LASSERT(!cfs_hash_is_rehashing(hs)); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - cfs_hash_bd_lock(hs, &bd, excl); - if (!func) { /* only glimpse size */ - count += bd.bd_bucket->hsb_count; - cfs_hash_bd_unlock(hs, &bd, excl); - continue; - } - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - cfs_hash_bucket_validate(hs, &bd, hnode); - count++; - loop++; - if (func(hs, &bd, hnode, data)) { - cfs_hash_bd_unlock(hs, &bd, excl); - goto out; - } - } - } - cfs_hash_bd_unlock(hs, &bd, excl); - if (loop < CFS_HASH_LOOP_HOG) - continue; - loop = 0; - cfs_hash_unlock(hs, 0); - cond_resched(); - cfs_hash_lock(hs, 0); - } - out: - cfs_hash_unlock(hs, 0); - - cfs_hash_for_each_exit(hs); - return count; -} - -struct cfs_hash_cond_arg { - cfs_hash_cond_opt_cb_t func; - void *arg; -}; - -static int -cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - struct cfs_hash_cond_arg *cond = data; - - if (cond->func(cfs_hash_object(hs, hnode), cond->arg)) - cfs_hash_bd_del_locked(hs, bd, hnode); - return 0; -} - -/** - * Delete item from the libcfs hash @hs when @func return true. - * The write lock being hold during loop for each bucket to avoid - * any object be reference. - */ -void -cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data) -{ - struct cfs_hash_cond_arg arg = { - .func = func, - .arg = data, - }; - - cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1); -} -EXPORT_SYMBOL(cfs_hash_cond_del); - -void -cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - cfs_hash_for_each_tight(hs, func, data, 0); -} -EXPORT_SYMBOL(cfs_hash_for_each); - -void -cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - cfs_hash_for_each_tight(hs, func, data, 1); -} -EXPORT_SYMBOL(cfs_hash_for_each_safe); - -static int -cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - *(int *)data = 0; - return 1; /* return 1 to break the loop */ -} - -int -cfs_hash_is_empty(struct cfs_hash *hs) -{ - int empty = 1; - - cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0); - return empty; -} -EXPORT_SYMBOL(cfs_hash_is_empty); - -u64 -cfs_hash_size_get(struct cfs_hash *hs) -{ - return cfs_hash_with_counter(hs) ? - atomic_read(&hs->hs_count) : - cfs_hash_for_each_tight(hs, NULL, NULL, 0); -} -EXPORT_SYMBOL(cfs_hash_size_get); - -/* - * cfs_hash_for_each_relax: - * Iterate the hash table and call @func on each item without - * any lock. This function can't guarantee to finish iteration - * if these features are enabled: - * - * a. if rehash_key is enabled, an item can be moved from - * one bucket to another bucket - * b. user can remove non-zero-ref item from hash-table, - * so the item can be removed from hash-table, even worse, - * it's possible that user changed key and insert to another - * hash bucket. - * there's no way for us to finish iteration correctly on previous - * two cases, so iteration has to be stopped on change. - */ -static int -cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data, int start) -{ - struct hlist_node *next = NULL; - struct hlist_node *hnode; - struct cfs_hash_bd bd; - u32 version; - int count = 0; - int stop_on_change; - int has_put_locked; - int end = -1; - int rc = 0; - int i; - - stop_on_change = cfs_hash_with_rehash_key(hs) || - !cfs_hash_with_no_itemref(hs); - has_put_locked = hs->hs_ops->hs_put_locked != NULL; - cfs_hash_lock(hs, 0); -again: - LASSERT(!cfs_hash_is_rehashing(hs)); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - if (i < start) - continue; - else if (end > 0 && i >= end) - break; - - cfs_hash_bd_lock(hs, &bd, 0); - version = cfs_hash_bd_version_get(&bd); - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hnode = hhead->first; - if (!hnode) - continue; - cfs_hash_get(hs, hnode); - - for (; hnode; hnode = next) { - cfs_hash_bucket_validate(hs, &bd, hnode); - next = hnode->next; - if (next) - cfs_hash_get(hs, next); - cfs_hash_bd_unlock(hs, &bd, 0); - cfs_hash_unlock(hs, 0); - - rc = func(hs, &bd, hnode, data); - if (stop_on_change || !has_put_locked) - cfs_hash_put(hs, hnode); - cond_resched(); - count++; - - cfs_hash_lock(hs, 0); - cfs_hash_bd_lock(hs, &bd, 0); - if (stop_on_change) { - if (version != - cfs_hash_bd_version_get(&bd)) - rc = -EINTR; - } else if (has_put_locked) { - cfs_hash_put_locked(hs, hnode); - } - if (rc) /* callback wants to break iteration */ - break; - } - if (next) { - if (has_put_locked) { - cfs_hash_put_locked(hs, next); - next = NULL; - } - break; - } else if (rc) { - break; - } - } - cfs_hash_bd_unlock(hs, &bd, 0); - if (next && !has_put_locked) { - cfs_hash_put(hs, next); - next = NULL; - } - if (rc) /* callback wants to break iteration */ - break; - } - if (start > 0 && !rc) { - end = start; - start = 0; - goto again; - } - - cfs_hash_unlock(hs, 0); - return count; -} - -int -cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data, int start) -{ - if (cfs_hash_with_no_lock(hs) || - cfs_hash_with_rehash_key(hs) || - !cfs_hash_with_no_itemref(hs)) - return -EOPNOTSUPP; - - if (!hs->hs_ops->hs_get || - (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) - return -EOPNOTSUPP; - - cfs_hash_for_each_enter(hs); - cfs_hash_for_each_relax(hs, func, data, start); - cfs_hash_for_each_exit(hs); - - return 0; -} -EXPORT_SYMBOL(cfs_hash_for_each_nolock); - -/** - * For each hash bucket in the libcfs hash @hs call the passed callback - * @func until all the hash buckets are empty. The passed callback @func - * or the previously registered callback hs->hs_put must remove the item - * from the hash. You may either use the cfs_hash_del() or hlist_del() - * functions. No rwlocks will be held during the callback @func it is - * safe to sleep if needed. This function will not terminate until the - * hash is empty. Note it is still possible to concurrently add new - * items in to the hash. It is the callers responsibility to ensure - * the required locking is in place to prevent concurrent insertions. - */ -int -cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - unsigned int i = 0; - - if (cfs_hash_with_no_lock(hs)) - return -EOPNOTSUPP; - - if (!hs->hs_ops->hs_get || - (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) - return -EOPNOTSUPP; - - cfs_hash_for_each_enter(hs); - while (cfs_hash_for_each_relax(hs, func, data, 0)) { - CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n", - hs->hs_name, i++); - } - cfs_hash_for_each_exit(hs); - return 0; -} -EXPORT_SYMBOL(cfs_hash_for_each_empty); - -void -cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex, - cfs_hash_for_each_cb_t func, void *data) -{ - struct hlist_head *hhead; - struct hlist_node *hnode; - struct cfs_hash_bd bd; - - cfs_hash_for_each_enter(hs); - cfs_hash_lock(hs, 0); - if (hindex >= CFS_HASH_NHLIST(hs)) - goto out; - - cfs_hash_bd_index_set(hs, hindex, &bd); - - cfs_hash_bd_lock(hs, &bd, 0); - hhead = cfs_hash_bd_hhead(hs, &bd); - hlist_for_each(hnode, hhead) { - if (func(hs, &bd, hnode, data)) - break; - } - cfs_hash_bd_unlock(hs, &bd, 0); -out: - cfs_hash_unlock(hs, 0); - cfs_hash_for_each_exit(hs); -} -EXPORT_SYMBOL(cfs_hash_hlist_for_each); - -/* - * For each item in the libcfs hash @hs which matches the @key call - * the passed callback @func and pass to it as an argument each hash - * item and the private @data. During the callback the bucket lock - * is held so the callback must never sleep. - */ -void -cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, - cfs_hash_for_each_cb_t func, void *data) -{ - struct hlist_node *hnode; - struct cfs_hash_bd bds[2]; - unsigned int i; - - cfs_hash_lock(hs, 0); - - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); - - cfs_hash_for_each_bd(bds, 2, i) { - struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]); - - hlist_for_each(hnode, hlist) { - cfs_hash_bucket_validate(hs, &bds[i], hnode); - - if (cfs_hash_keycmp(hs, key, hnode)) { - if (func(hs, &bds[i], hnode, data)) - break; - } - } - } - - cfs_hash_dual_bd_unlock(hs, bds, 0); - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_for_each_key); - -/** - * Rehash the libcfs hash @hs to the given @bits. This can be used - * to grow the hash size when excessive chaining is detected, or to - * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH - * flag is set in @hs the libcfs hash may be dynamically rehashed - * during addition or removal if the hash's theta value exceeds - * either the hs->hs_min_theta or hs->max_theta values. By default - * these values are tuned to keep the chained hash depth small, and - * this approach assumes a reasonably uniform hashing function. The - * theta thresholds for @hs are tunable via cfs_hash_set_theta(). - */ -void -cfs_hash_rehash_cancel(struct cfs_hash *hs) -{ - LASSERT(cfs_hash_with_rehash(hs)); - cancel_work_sync(&hs->hs_rehash_work); -} - -void -cfs_hash_rehash(struct cfs_hash *hs, int do_rehash) -{ - int rc; - - LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs)); - - cfs_hash_lock(hs, 1); - - rc = cfs_hash_rehash_bits(hs); - if (rc <= 0) { - cfs_hash_unlock(hs, 1); - return; - } - - hs->hs_rehash_bits = rc; - if (!do_rehash) { - /* launch and return */ - queue_work(cfs_rehash_wq, &hs->hs_rehash_work); - cfs_hash_unlock(hs, 1); - return; - } - - /* rehash right now */ - cfs_hash_unlock(hs, 1); - - cfs_hash_rehash_worker(&hs->hs_rehash_work); -} - -static int -cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old) -{ - struct cfs_hash_bd new; - struct hlist_head *hhead; - struct hlist_node *hnode; - struct hlist_node *pos; - void *key; - int c = 0; - - /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */ - cfs_hash_bd_for_each_hlist(hs, old, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - key = cfs_hash_key(hs, hnode); - LASSERT(key); - /* Validate hnode is in the correct bucket. */ - cfs_hash_bucket_validate(hs, old, hnode); - /* - * Delete from old hash bucket; move to new bucket. - * ops->hs_key must be defined. - */ - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, &new); - cfs_hash_bd_move_locked(hs, old, &new, hnode); - c++; - } - } - - return c; -} - -static void -cfs_hash_rehash_worker(struct work_struct *work) -{ - struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_rehash_work); - struct cfs_hash_bucket **bkts; - struct cfs_hash_bd bd; - unsigned int old_size; - unsigned int new_size; - int bsize; - int count = 0; - int rc = 0; - int i; - - LASSERT(hs && cfs_hash_with_rehash(hs)); - - cfs_hash_lock(hs, 0); - LASSERT(cfs_hash_is_rehashing(hs)); - - old_size = CFS_HASH_NBKT(hs); - new_size = CFS_HASH_RH_NBKT(hs); - - cfs_hash_unlock(hs, 0); - - /* - * don't need hs::hs_rwlock for hs::hs_buckets, - * because nobody can change bkt-table except me. - */ - bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets, - old_size, new_size); - cfs_hash_lock(hs, 1); - if (!bkts) { - rc = -ENOMEM; - goto out; - } - - if (bkts == hs->hs_buckets) { - bkts = NULL; /* do nothing */ - goto out; - } - - rc = __cfs_hash_theta(hs); - if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) { - /* free the new allocated bkt-table */ - old_size = new_size; - new_size = CFS_HASH_NBKT(hs); - rc = -EALREADY; - goto out; - } - - LASSERT(!hs->hs_rehash_buckets); - hs->hs_rehash_buckets = bkts; - - rc = 0; - cfs_hash_for_each_bucket(hs, &bd, i) { - if (cfs_hash_is_exiting(hs)) { - rc = -ESRCH; - /* someone wants to destroy the hash, abort now */ - if (old_size < new_size) /* OK to free old bkt-table */ - break; - /* it's shrinking, need free new bkt-table */ - hs->hs_rehash_buckets = NULL; - old_size = new_size; - new_size = CFS_HASH_NBKT(hs); - goto out; - } - - count += cfs_hash_rehash_bd(hs, &bd); - if (count < CFS_HASH_LOOP_HOG || - cfs_hash_is_iterating(hs)) { /* need to finish ASAP */ - continue; - } - - count = 0; - cfs_hash_unlock(hs, 1); - cond_resched(); - cfs_hash_lock(hs, 1); - } - - hs->hs_rehash_count++; - - bkts = hs->hs_buckets; - hs->hs_buckets = hs->hs_rehash_buckets; - hs->hs_rehash_buckets = NULL; - - hs->hs_cur_bits = hs->hs_rehash_bits; -out: - hs->hs_rehash_bits = 0; - bsize = cfs_hash_bkt_size(hs); - cfs_hash_unlock(hs, 1); - /* can't refer to @hs anymore because it could be destroyed */ - if (bkts) - cfs_hash_buckets_free(bkts, bsize, new_size, old_size); - if (rc) - CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc); -} - -/** - * Rehash the object referenced by @hnode in the libcfs hash @hs. The - * @old_key must be provided to locate the objects previous location - * in the hash, and the @new_key will be used to reinsert the object. - * Use this function instead of a cfs_hash_add() + cfs_hash_del() - * combo when it is critical that there is no window in time where the - * object is missing from the hash. When an object is being rehashed - * the registered cfs_hash_get() and cfs_hash_put() functions will - * not be called. - */ -void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key, - void *new_key, struct hlist_node *hnode) -{ - struct cfs_hash_bd bds[3]; - struct cfs_hash_bd old_bds[2]; - struct cfs_hash_bd new_bd; - - LASSERT(!hlist_unhashed(hnode)); - - cfs_hash_lock(hs, 0); - - cfs_hash_dual_bd_get(hs, old_key, old_bds); - cfs_hash_bd_get(hs, new_key, &new_bd); - - bds[0] = old_bds[0]; - bds[1] = old_bds[1]; - bds[2] = new_bd; - - /* NB: bds[0] and bds[1] are ordered already */ - cfs_hash_bd_order(&bds[1], &bds[2]); - cfs_hash_bd_order(&bds[0], &bds[1]); - - cfs_hash_multi_bd_lock(hs, bds, 3, 1); - if (likely(!old_bds[1].bd_bucket)) { - cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode); - } else { - cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode); - cfs_hash_bd_add_locked(hs, &new_bd, hnode); - } - /* overwrite key inside locks, otherwise may screw up with - * other operations, i.e: rehash - */ - cfs_hash_keycpy(hs, hnode, new_key); - - cfs_hash_multi_bd_unlock(hs, bds, 3, 1); - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_rehash_key); - -void cfs_hash_debug_header(struct seq_file *m) -{ - seq_printf(m, "%-*s cur min max theta t-min t-max flags rehash count maxdep maxdepb distribution\n", - CFS_HASH_BIGNAME_LEN, "name"); -} -EXPORT_SYMBOL(cfs_hash_debug_header); - -static struct cfs_hash_bucket ** -cfs_hash_full_bkts(struct cfs_hash *hs) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (!hs->hs_rehash_buckets) - return hs->hs_buckets; - - LASSERT(hs->hs_rehash_bits); - return hs->hs_rehash_bits > hs->hs_cur_bits ? - hs->hs_rehash_buckets : hs->hs_buckets; -} - -static unsigned int -cfs_hash_full_nbkt(struct cfs_hash *hs) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (!hs->hs_rehash_buckets) - return CFS_HASH_NBKT(hs); - - LASSERT(hs->hs_rehash_bits); - return hs->hs_rehash_bits > hs->hs_cur_bits ? - CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs); -} - -void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m) -{ - int dist[8] = { 0, }; - int maxdep = -1; - int maxdepb = -1; - int total = 0; - int theta; - int i; - - cfs_hash_lock(hs, 0); - theta = __cfs_hash_theta(hs); - - seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ", - CFS_HASH_BIGNAME_LEN, hs->hs_name, - 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits, - 1 << hs->hs_max_bits, - __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta), - __cfs_hash_theta_int(hs->hs_min_theta), - __cfs_hash_theta_frac(hs->hs_min_theta), - __cfs_hash_theta_int(hs->hs_max_theta), - __cfs_hash_theta_frac(hs->hs_max_theta), - hs->hs_flags, hs->hs_rehash_count); - - /* - * The distribution is a summary of the chained hash depth in - * each of the libcfs hash buckets. Each buckets hsb_count is - * divided by the hash theta value and used to generate a - * histogram of the hash distribution. A uniform hash will - * result in all hash buckets being close to the average thus - * only the first few entries in the histogram will be non-zero. - * If you hash function results in a non-uniform hash the will - * be observable by outlier bucks in the distribution histogram. - * - * Uniform hash distribution: 128/128/0/0/0/0/0/0 - * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 - */ - for (i = 0; i < cfs_hash_full_nbkt(hs); i++) { - struct cfs_hash_bd bd; - - bd.bd_bucket = cfs_hash_full_bkts(hs)[i]; - cfs_hash_bd_lock(hs, &bd, 0); - if (maxdep < bd.bd_bucket->hsb_depmax) { - maxdep = bd.bd_bucket->hsb_depmax; - maxdepb = ffz(~maxdep); - } - total += bd.bd_bucket->hsb_count; - dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++; - cfs_hash_bd_unlock(hs, &bd, 0); - } - - seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb); - for (i = 0; i < 8; i++) - seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/'); - - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_debug_str); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c deleted file mode 100644 index 76291a350406..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c +++ /dev/null @@ -1,228 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> - -/** Global CPU partition table */ -struct cfs_cpt_table *cfs_cpt_table __read_mostly; -EXPORT_SYMBOL(cfs_cpt_table); - -#ifndef HAVE_LIBCFS_CPT - -#define CFS_CPU_VERSION_MAGIC 0xbabecafe - -struct cfs_cpt_table * -cfs_cpt_table_alloc(unsigned int ncpt) -{ - struct cfs_cpt_table *cptab; - - if (ncpt != 1) { - CERROR("Can't support cpu partition number %d\n", ncpt); - return NULL; - } - - cptab = kzalloc(sizeof(*cptab), GFP_NOFS); - if (cptab) { - cptab->ctb_version = CFS_CPU_VERSION_MAGIC; - node_set(0, cptab->ctb_nodemask); - cptab->ctb_nparts = ncpt; - } - - return cptab; -} -EXPORT_SYMBOL(cfs_cpt_table_alloc); - -void -cfs_cpt_table_free(struct cfs_cpt_table *cptab) -{ - LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC); - - kfree(cptab); -} -EXPORT_SYMBOL(cfs_cpt_table_free); - -#ifdef CONFIG_SMP -int -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) -{ - int rc; - - rc = snprintf(buf, len, "%d\t: %d\n", 0, 0); - len -= rc; - if (len <= 0) - return -EFBIG; - - return rc; -} -EXPORT_SYMBOL(cfs_cpt_table_print); -#endif /* CONFIG_SMP */ - -int -cfs_cpt_number(struct cfs_cpt_table *cptab) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_number); - -int -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_weight); - -int -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_online); - -nodemask_t * -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) -{ - return &cptab->ctb_nodemask; -} -EXPORT_SYMBOL(cfs_cpt_nodemask); - -int -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpu); - -void -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_cpu); - -int -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpumask); - -void -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_cpumask); - -int -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_node); - -void -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_node); - -int -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_nodemask); - -void -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_nodemask); - -void -cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) -{ -} -EXPORT_SYMBOL(cfs_cpt_clear); - -int -cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_spread_node); - -int -cfs_cpu_ht_nsiblings(int cpu) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpu_ht_nsiblings); - -int -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_current); - -int -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_of_cpu); - -int -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_bind); - -void -cfs_cpu_fini(void) -{ - if (cfs_cpt_table) { - cfs_cpt_table_free(cfs_cpt_table); - cfs_cpt_table = NULL; - } -} - -int -cfs_cpu_init(void) -{ - cfs_cpt_table = cfs_cpt_table_alloc(1); - - return cfs_cpt_table ? 0 : -1; -} - -#endif /* HAVE_LIBCFS_CPT */ diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c deleted file mode 100644 index 670ad5a34224..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c +++ /dev/null @@ -1,152 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> - -/** destroy cpu-partition lock, see libcfs_private.h for more detail */ -void -cfs_percpt_lock_free(struct cfs_percpt_lock *pcl) -{ - LASSERT(pcl->pcl_locks); - LASSERT(!pcl->pcl_locked); - - cfs_percpt_free(pcl->pcl_locks); - kfree(pcl); -} -EXPORT_SYMBOL(cfs_percpt_lock_free); - -/** - * create cpu-partition lock, see libcfs_private.h for more detail. - * - * cpu-partition lock is designed for large-scale SMP system, so we need to - * reduce cacheline conflict as possible as we can, that's the - * reason we always allocate cacheline-aligned memory block. - */ -struct cfs_percpt_lock * -cfs_percpt_lock_create(struct cfs_cpt_table *cptab, - struct lock_class_key *keys) -{ - struct cfs_percpt_lock *pcl; - spinlock_t *lock; - int i; - - /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */ - pcl = kzalloc(sizeof(*pcl), GFP_NOFS); - if (!pcl) - return NULL; - - pcl->pcl_cptab = cptab; - pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock)); - if (!pcl->pcl_locks) { - kfree(pcl); - return NULL; - } - - if (!keys) - CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n"); - - cfs_percpt_for_each(lock, i, pcl->pcl_locks) { - spin_lock_init(lock); - if (keys) - lockdep_set_class(lock, &keys[i]); - } - - return pcl; -} -EXPORT_SYMBOL(cfs_percpt_lock_create); - -/** - * lock a CPU partition - * - * \a index != CFS_PERCPT_LOCK_EX - * hold private lock indexed by \a index - * - * \a index == CFS_PERCPT_LOCK_EX - * exclusively lock @pcl and nobody can take private lock - */ -void -cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index) - __acquires(pcl->pcl_locks) -{ - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; - - LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt); - - if (ncpt == 1) { - index = 0; - } else { /* serialize with exclusive lock */ - while (pcl->pcl_locked) - cpu_relax(); - } - - if (likely(index != CFS_PERCPT_LOCK_EX)) { - spin_lock(pcl->pcl_locks[index]); - return; - } - - /* exclusive lock request */ - for (i = 0; i < ncpt; i++) { - spin_lock(pcl->pcl_locks[i]); - if (!i) { - LASSERT(!pcl->pcl_locked); - /* nobody should take private lock after this - * so I wouldn't starve for too long time - */ - pcl->pcl_locked = 1; - } - } -} -EXPORT_SYMBOL(cfs_percpt_lock); - -/** unlock a CPU partition */ -void -cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) - __releases(pcl->pcl_locks) -{ - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; - - index = ncpt == 1 ? 0 : index; - - if (likely(index != CFS_PERCPT_LOCK_EX)) { - spin_unlock(pcl->pcl_locks[index]); - return; - } - - for (i = ncpt - 1; i >= 0; i--) { - if (!i) { - LASSERT(pcl->pcl_locked); - pcl->pcl_locked = 0; - } - spin_unlock(pcl->pcl_locks[i]); - } -} -EXPORT_SYMBOL(cfs_percpt_unlock); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c deleted file mode 100644 index 7faed94994ea..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c +++ /dev/null @@ -1,167 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> - -struct cfs_var_array { - unsigned int va_count; /* # of buffers */ - unsigned int va_size; /* size of each var */ - struct cfs_cpt_table *va_cptab; /* cpu partition table */ - void *va_ptrs[0]; /* buffer addresses */ -}; - -/* - * free per-cpu data, see more detail in cfs_percpt_free - */ -void -cfs_percpt_free(void *vars) -{ - struct cfs_var_array *arr; - int i; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - for (i = 0; i < arr->va_count; i++) - kfree(arr->va_ptrs[i]); - - kvfree(arr); -} -EXPORT_SYMBOL(cfs_percpt_free); - -/* - * allocate per cpu-partition variables, returned value is an array of pointers, - * variable can be indexed by CPU partition ID, i.e: - * - * arr = cfs_percpt_alloc(cfs_cpu_pt, size); - * then caller can access memory block for CPU 0 by arr[0], - * memory block for CPU 1 by arr[1]... - * memory block for CPU N by arr[N]... - * - * cacheline aligned. - */ -void * -cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size) -{ - struct cfs_var_array *arr; - int count; - int i; - - count = cfs_cpt_number(cptab); - - arr = kvzalloc(offsetof(struct cfs_var_array, va_ptrs[count]), - GFP_KERNEL); - if (!arr) - return NULL; - - size = L1_CACHE_ALIGN(size); - arr->va_size = size; - arr->va_count = count; - arr->va_cptab = cptab; - - for (i = 0; i < count; i++) { - arr->va_ptrs[i] = kzalloc_node(size, GFP_KERNEL, - cfs_cpt_spread_node(cptab, i)); - if (!arr->va_ptrs[i]) { - cfs_percpt_free((void *)&arr->va_ptrs[0]); - return NULL; - } - } - - return (void *)&arr->va_ptrs[0]; -} -EXPORT_SYMBOL(cfs_percpt_alloc); - -/* - * return number of CPUs (or number of elements in per-cpu data) - * according to cptab of @vars - */ -int -cfs_percpt_number(void *vars) -{ - struct cfs_var_array *arr; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - return arr->va_count; -} -EXPORT_SYMBOL(cfs_percpt_number); - -/* - * free variable array, see more detail in cfs_array_alloc - */ -void -cfs_array_free(void *vars) -{ - struct cfs_var_array *arr; - int i; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - for (i = 0; i < arr->va_count; i++) { - if (!arr->va_ptrs[i]) - continue; - - kvfree(arr->va_ptrs[i]); - } - kvfree(arr); -} -EXPORT_SYMBOL(cfs_array_free); - -/* - * allocate a variable array, returned value is an array of pointers. - * Caller can specify length of array by @count, @size is size of each - * memory block in array. - */ -void * -cfs_array_alloc(int count, unsigned int size) -{ - struct cfs_var_array *arr; - int i; - - arr = kvmalloc(offsetof(struct cfs_var_array, va_ptrs[count]), GFP_KERNEL); - if (!arr) - return NULL; - - arr->va_count = count; - arr->va_size = size; - - for (i = 0; i < count; i++) { - arr->va_ptrs[i] = kvzalloc(size, GFP_KERNEL); - - if (!arr->va_ptrs[i]) { - cfs_array_free((void *)&arr->va_ptrs[0]); - return NULL; - } - } - - return (void *)&arr->va_ptrs[0]; -} -EXPORT_SYMBOL(cfs_array_alloc); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c deleted file mode 100644 index 442889a3d729..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c +++ /dev/null @@ -1,556 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * String manipulation functions. - * - * libcfs/libcfs/libcfs_string.c - * - * Author: Nathan Rutman <nathan.rutman@sun.com> - */ - -#include <linux/libcfs/libcfs.h> - -/* Convert a text string to a bitmask */ -int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), - int *oldmask, int minmask, int allmask) -{ - const char *debugstr; - char op = '\0'; - int newmask = minmask, i, len, found = 0; - - /* <str> must be a list of tokens separated by whitespace - * and optionally an operator ('+' or '-'). If an operator - * appears first in <str>, '*oldmask' is used as the starting point - * (relative), otherwise minmask is used (absolute). An operator - * applies to all following tokens up to the next operator. - */ - while (*str != '\0') { - while (isspace(*str)) - str++; - if (*str == '\0') - break; - if (*str == '+' || *str == '-') { - op = *str++; - if (!found) - /* only if first token is relative */ - newmask = *oldmask; - while (isspace(*str)) - str++; - if (*str == '\0') /* trailing op */ - return -EINVAL; - } - - /* find token length */ - len = 0; - while (str[len] != '\0' && !isspace(str[len]) && - str[len] != '+' && str[len] != '-') - len++; - - /* match token */ - found = 0; - for (i = 0; i < 32; i++) { - debugstr = bit2str(i); - if (debugstr && strlen(debugstr) == len && - !strncasecmp(str, debugstr, len)) { - if (op == '-') - newmask &= ~(1 << i); - else - newmask |= (1 << i); - found = 1; - break; - } - } - if (!found && len == 3 && - !strncasecmp(str, "ALL", len)) { - if (op == '-') - newmask = minmask; - else - newmask = allmask; - found = 1; - } - if (!found) { - CWARN("unknown mask '%.*s'.\n" - "mask usage: [+|-]<all|type> ...\n", len, str); - return -EINVAL; - } - str += len; - } - - *oldmask = newmask; - return 0; -} - -/* get the first string out of @str */ -char *cfs_firststr(char *str, size_t size) -{ - size_t i = 0; - char *end; - - /* trim leading spaces */ - while (i < size && *str && isspace(*str)) { - ++i; - ++str; - } - - /* string with all spaces */ - if (*str == '\0') - goto out; - - end = str; - while (i < size && *end != '\0' && !isspace(*end)) { - ++i; - ++end; - } - - *end = '\0'; -out: - return str; -} -EXPORT_SYMBOL(cfs_firststr); - -/** - * Extracts tokens from strings. - * - * Looks for \a delim in string \a next, sets \a res to point to - * substring before the delimiter, sets \a next right after the found - * delimiter. - * - * \retval 1 if \a res points to a string of non-whitespace characters - * \retval 0 otherwise - */ -int -cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res) -{ - char *end; - - if (!next->ls_str) - return 0; - - /* skip leading white spaces */ - while (next->ls_len) { - if (!isspace(*next->ls_str)) - break; - next->ls_str++; - next->ls_len--; - } - - if (!next->ls_len) /* whitespaces only */ - return 0; - - if (*next->ls_str == delim) { - /* first non-writespace is the delimiter */ - return 0; - } - - res->ls_str = next->ls_str; - end = memchr(next->ls_str, delim, next->ls_len); - if (!end) { - /* there is no the delimeter in the string */ - end = next->ls_str + next->ls_len; - next->ls_str = NULL; - } else { - next->ls_str = end + 1; - next->ls_len -= (end - res->ls_str + 1); - } - - /* skip ending whitespaces */ - while (--end != res->ls_str) { - if (!isspace(*end)) - break; - } - - res->ls_len = end - res->ls_str + 1; - return 1; -} -EXPORT_SYMBOL(cfs_gettok); - -/** - * Converts string to integer. - * - * Accepts decimal and hexadecimal number recordings. - * - * \retval 1 if first \a nob chars of \a str convert to decimal or - * hexadecimal integer in the range [\a min, \a max] - * \retval 0 otherwise - */ -int -cfs_str2num_check(char *str, int nob, unsigned int *num, - unsigned int min, unsigned int max) -{ - bool all_numbers = true; - char *endp, cache; - int rc; - - /** - * kstrouint can only handle strings composed - * of only numbers. We need to scan the string - * passed in for the first non-digit character - * and end the string at that location. If we - * don't find any non-digit character we still - * need to place a '\0' at position nob since - * we are not interested in the rest of the - * string which is longer than nob in size. - * After we are done the character at the - * position we placed '\0' must be restored. - */ - for (endp = str; endp < str + nob; endp++) { - if (!isdigit(*endp)) { - all_numbers = false; - break; - } - } - cache = *endp; - *endp = '\0'; - - rc = kstrtouint(str, 10, num); - *endp = cache; - if (rc || !all_numbers) - return 0; - - return (*num >= min && *num <= max); -} -EXPORT_SYMBOL(cfs_str2num_check); - -/** - * Parses \<range_expr\> token of the syntax. If \a bracketed is false, - * \a src should only have a single token which can be \<number\> or \* - * - * \retval pointer to allocated range_expr and initialized - * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a - `* src parses to - * \<number\> | - * \<number\> '-' \<number\> | - * \<number\> '-' \<number\> '/' \<number\> - * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or - * -ENOMEM will be returned. - */ -static int -cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max, - int bracketed, struct cfs_range_expr **expr) -{ - struct cfs_range_expr *re; - struct cfs_lstr tok; - - re = kzalloc(sizeof(*re), GFP_NOFS); - if (!re) - return -ENOMEM; - - if (src->ls_len == 1 && src->ls_str[0] == '*') { - re->re_lo = min; - re->re_hi = max; - re->re_stride = 1; - goto out; - } - - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_lo, min, max)) { - /* <number> is parsed */ - re->re_hi = re->re_lo; - re->re_stride = 1; - goto out; - } - - if (!bracketed || !cfs_gettok(src, '-', &tok)) - goto failed; - - if (!cfs_str2num_check(tok.ls_str, tok.ls_len, - &re->re_lo, min, max)) - goto failed; - - /* <number> - */ - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_hi, min, max)) { - /* <number> - <number> is parsed */ - re->re_stride = 1; - goto out; - } - - /* go to check <number> '-' <number> '/' <number> */ - if (cfs_gettok(src, '/', &tok)) { - if (!cfs_str2num_check(tok.ls_str, tok.ls_len, - &re->re_hi, min, max)) - goto failed; - - /* <number> - <number> / ... */ - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_stride, min, max)) { - /* <number> - <number> / <number> is parsed */ - goto out; - } - } - - out: - *expr = re; - return 0; - - failed: - kfree(re); - return -EINVAL; -} - -/** - * Print the range expression \a re into specified \a buffer. - * If \a bracketed is true, expression does not need additional - * brackets. - * - * \retval number of characters written - */ -static int -cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr, - bool bracketed) -{ - int i; - char s[] = "["; - char e[] = "]"; - - if (bracketed) { - s[0] = '\0'; - e[0] = '\0'; - } - - if (expr->re_lo == expr->re_hi) - i = scnprintf(buffer, count, "%u", expr->re_lo); - else if (expr->re_stride == 1) - i = scnprintf(buffer, count, "%s%u-%u%s", - s, expr->re_lo, expr->re_hi, e); - else - i = scnprintf(buffer, count, "%s%u-%u/%u%s", - s, expr->re_lo, expr->re_hi, expr->re_stride, e); - return i; -} - -/** - * Print a list of range expressions (\a expr_list) into specified \a buffer. - * If the list contains several expressions, separate them with comma - * and surround the list with brackets. - * - * \retval number of characters written - */ -int -cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list) -{ - struct cfs_range_expr *expr; - int i = 0, j = 0; - int numexprs = 0; - - if (count <= 0) - return 0; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) - numexprs++; - - if (numexprs > 1) - i += scnprintf(buffer + i, count - i, "["); - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - if (j++) - i += scnprintf(buffer + i, count - i, ","); - i += cfs_range_expr_print(buffer + i, count - i, expr, - numexprs > 1); - } - - if (numexprs > 1) - i += scnprintf(buffer + i, count - i, "]"); - - return i; -} -EXPORT_SYMBOL(cfs_expr_list_print); - -/** - * Matches value (\a value) against ranges expression list \a expr_list. - * - * \retval 1 if \a value matches - * \retval 0 otherwise - */ -int -cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list) -{ - struct cfs_range_expr *expr; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - if (value >= expr->re_lo && value <= expr->re_hi && - !((value - expr->re_lo) % expr->re_stride)) - return 1; - } - - return 0; -} -EXPORT_SYMBOL(cfs_expr_list_match); - -/** - * Convert express list (\a expr_list) to an array of all matched values - * - * \retval N N is total number of all matched values - * \retval 0 if expression list is empty - * \retval < 0 for failure - */ -int -cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp) -{ - struct cfs_range_expr *expr; - u32 *val; - int count = 0; - int i; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (!((i - expr->re_lo) % expr->re_stride)) - count++; - } - } - - if (!count) /* empty expression list */ - return 0; - - if (count > max) { - CERROR("Number of values %d exceeds max allowed %d\n", - max, count); - return -EINVAL; - } - - val = kvmalloc_array(count, sizeof(val[0]), GFP_KERNEL | __GFP_ZERO); - if (!val) - return -ENOMEM; - - count = 0; - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (!((i - expr->re_lo) % expr->re_stride)) - val[count++] = i; - } - } - - *valpp = val; - return count; -} -EXPORT_SYMBOL(cfs_expr_list_values); - -/** - * Frees cfs_range_expr structures of \a expr_list. - * - * \retval none - */ -void -cfs_expr_list_free(struct cfs_expr_list *expr_list) -{ - while (!list_empty(&expr_list->el_exprs)) { - struct cfs_range_expr *expr; - - expr = list_entry(expr_list->el_exprs.next, - struct cfs_range_expr, re_link); - list_del(&expr->re_link); - kfree(expr); - } - - kfree(expr_list); -} -EXPORT_SYMBOL(cfs_expr_list_free); - -/** - * Parses \<cfs_expr_list\> token of the syntax. - * - * \retval 0 if \a str parses to \<number\> | \<expr_list\> - * \retval -errno otherwise - */ -int -cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max, - struct cfs_expr_list **elpp) -{ - struct cfs_expr_list *expr_list; - struct cfs_range_expr *expr; - struct cfs_lstr src; - int rc; - - expr_list = kzalloc(sizeof(*expr_list), GFP_NOFS); - if (!expr_list) - return -ENOMEM; - - src.ls_str = str; - src.ls_len = len; - - INIT_LIST_HEAD(&expr_list->el_exprs); - - if (src.ls_str[0] == '[' && - src.ls_str[src.ls_len - 1] == ']') { - src.ls_str++; - src.ls_len -= 2; - - rc = -EINVAL; - while (src.ls_str) { - struct cfs_lstr tok; - - if (!cfs_gettok(&src, ',', &tok)) { - rc = -EINVAL; - break; - } - - rc = cfs_range_expr_parse(&tok, min, max, 1, &expr); - if (rc) - break; - - list_add_tail(&expr->re_link, &expr_list->el_exprs); - } - } else { - rc = cfs_range_expr_parse(&src, min, max, 0, &expr); - if (!rc) - list_add_tail(&expr->re_link, &expr_list->el_exprs); - } - - if (rc) - cfs_expr_list_free(expr_list); - else - *elpp = expr_list; - - return rc; -} -EXPORT_SYMBOL(cfs_expr_list_parse); - -/** - * Frees cfs_expr_list structures of \a list. - * - * For each struct cfs_expr_list structure found on \a list it frees - * range_expr list attached to it and frees the cfs_expr_list itself. - * - * \retval none - */ -void -cfs_expr_list_free_list(struct list_head *list) -{ - struct cfs_expr_list *el; - - while (!list_empty(list)) { - el = list_entry(list->next, struct cfs_expr_list, el_link); - list_del(&el->el_link); - cfs_expr_list_free(el); - } -} -EXPORT_SYMBOL(cfs_expr_list_free_list); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c deleted file mode 100644 index 388521e4e354..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c +++ /dev/null @@ -1,1079 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/cpu.h> -#include <linux/sched.h> -#include <linux/libcfs/libcfs.h> - -#ifdef CONFIG_SMP - -/** - * modparam for setting number of partitions - * - * 0 : estimate best value based on cores or NUMA nodes - * 1 : disable multiple partitions - * >1 : specify number of partitions - */ -static int cpu_npartitions; -module_param(cpu_npartitions, int, 0444); -MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); - -/** - * modparam for setting CPU partitions patterns: - * - * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID, - * number in bracket is processor ID (core or HT) - * - * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket - * are NUMA node ID, number before bracket is CPU partition ID. - * - * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology - * - * NB: If user specified cpu_pattern, cpu_npartitions will be ignored - */ -static char *cpu_pattern = "N"; -module_param(cpu_pattern, charp, 0444); -MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern"); - -struct cfs_cpt_data { - /* serialize hotplug etc */ - spinlock_t cpt_lock; - /* reserved for hotplug */ - unsigned long cpt_version; - /* mutex to protect cpt_cpumask */ - struct mutex cpt_mutex; - /* scratch buffer for set/unset_node */ - cpumask_var_t cpt_cpumask; -}; - -static struct cfs_cpt_data cpt_data; - -static void -cfs_node_to_cpumask(int node, cpumask_t *mask) -{ - const cpumask_t *tmp = cpumask_of_node(node); - - if (tmp) - cpumask_copy(mask, tmp); - else - cpumask_clear(mask); -} - -void -cfs_cpt_table_free(struct cfs_cpt_table *cptab) -{ - int i; - - kvfree(cptab->ctb_cpu2cpt); - - for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) { - struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; - - kfree(part->cpt_nodemask); - free_cpumask_var(part->cpt_cpumask); - } - - kvfree(cptab->ctb_parts); - - kfree(cptab->ctb_nodemask); - free_cpumask_var(cptab->ctb_cpumask); - - kfree(cptab); -} -EXPORT_SYMBOL(cfs_cpt_table_free); - -struct cfs_cpt_table * -cfs_cpt_table_alloc(unsigned int ncpt) -{ - struct cfs_cpt_table *cptab; - int i; - - cptab = kzalloc(sizeof(*cptab), GFP_NOFS); - if (!cptab) - return NULL; - - cptab->ctb_nparts = ncpt; - - cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask), - GFP_NOFS); - if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) || - !cptab->ctb_nodemask) - goto failed; - - cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(), - sizeof(cptab->ctb_cpu2cpt[0]), - GFP_KERNEL); - if (!cptab->ctb_cpu2cpt) - goto failed; - - memset(cptab->ctb_cpu2cpt, -1, - num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); - - cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]), - GFP_KERNEL); - if (!cptab->ctb_parts) - goto failed; - - for (i = 0; i < ncpt; i++) { - struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; - - part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask), - GFP_NOFS); - if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) || - !part->cpt_nodemask) - goto failed; - } - - spin_lock(&cpt_data.cpt_lock); - /* Reserved for hotplug */ - cptab->ctb_version = cpt_data.cpt_version; - spin_unlock(&cpt_data.cpt_lock); - - return cptab; - - failed: - cfs_cpt_table_free(cptab); - return NULL; -} -EXPORT_SYMBOL(cfs_cpt_table_alloc); - -int -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) -{ - char *tmp = buf; - int rc = 0; - int i; - int j; - - for (i = 0; i < cptab->ctb_nparts; i++) { - if (len > 0) { - rc = snprintf(tmp, len, "%d\t: ", i); - len -= rc; - } - - if (len <= 0) { - rc = -EFBIG; - goto out; - } - - tmp += rc; - for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) { - rc = snprintf(tmp, len, "%d ", j); - len -= rc; - if (len <= 0) { - rc = -EFBIG; - goto out; - } - tmp += rc; - } - - *tmp = '\n'; - tmp++; - len--; - } - - out: - if (rc < 0) - return rc; - - return tmp - buf; -} -EXPORT_SYMBOL(cfs_cpt_table_print); - -int -cfs_cpt_number(struct cfs_cpt_table *cptab) -{ - return cptab->ctb_nparts; -} -EXPORT_SYMBOL(cfs_cpt_number); - -int -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cpumask_weight(cptab->ctb_cpumask) : - cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask); -} -EXPORT_SYMBOL(cfs_cpt_weight); - -int -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cpumask_any_and(cptab->ctb_cpumask, - cpu_online_mask) < nr_cpu_ids : - cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask, - cpu_online_mask) < nr_cpu_ids; -} -EXPORT_SYMBOL(cfs_cpt_online); - -cpumask_var_t * -cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask; -} -EXPORT_SYMBOL(cfs_cpt_cpumask); - -nodemask_t * -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask; -} -EXPORT_SYMBOL(cfs_cpt_nodemask); - -int -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - int node; - - LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); - - if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) { - CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu); - return 0; - } - - if (cptab->ctb_cpu2cpt[cpu] != -1) { - CDEBUG(D_INFO, "CPU %d is already in partition %d\n", - cpu, cptab->ctb_cpu2cpt[cpu]); - return 0; - } - - cptab->ctb_cpu2cpt[cpu] = cpt; - - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); - - cpumask_set_cpu(cpu, cptab->ctb_cpumask); - cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); - - node = cpu_to_node(cpu); - - /* first CPU of @node in this CPT table */ - if (!node_isset(node, *cptab->ctb_nodemask)) - node_set(node, *cptab->ctb_nodemask); - - /* first CPU of @node in this partition */ - if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)) - node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask); - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpu); - -void -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - int node; - int i; - - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - if (cpu < 0 || cpu >= nr_cpu_ids) { - CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu); - return; - } - - if (cpt == CFS_CPT_ANY) { - /* caller doesn't know the partition ID */ - cpt = cptab->ctb_cpu2cpt[cpu]; - if (cpt < 0) { /* not set in this CPT-table */ - CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n", - cpt, cptab); - return; - } - - } else if (cpt != cptab->ctb_cpu2cpt[cpu]) { - CDEBUG(D_INFO, - "CPU %d is not in cpu-partition %d\n", cpu, cpt); - return; - } - - LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); - LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - - cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); - cpumask_clear_cpu(cpu, cptab->ctb_cpumask); - cptab->ctb_cpu2cpt[cpu] = -1; - - node = cpu_to_node(cpu); - - LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)); - LASSERT(node_isset(node, *cptab->ctb_nodemask)); - - for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) { - /* this CPT has other CPU belonging to this node? */ - if (cpu_to_node(i) == node) - break; - } - - if (i >= nr_cpu_ids) - node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask); - - for_each_cpu(i, cptab->ctb_cpumask) { - /* this CPT-table has other CPU belonging to this node? */ - if (cpu_to_node(i) == node) - break; - } - - if (i >= nr_cpu_ids) - node_clear(node, *cptab->ctb_nodemask); -} -EXPORT_SYMBOL(cfs_cpt_unset_cpu); - -int -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - int i; - - if (!cpumask_weight(mask) || - cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { - CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n", - cpt); - return 0; - } - - for_each_cpu(i, mask) { - if (!cfs_cpt_set_cpu(cptab, cpt, i)) - return 0; - } - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpumask); - -void -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - int i; - - for_each_cpu(i, mask) - cfs_cpt_unset_cpu(cptab, cpt, i); -} -EXPORT_SYMBOL(cfs_cpt_unset_cpumask); - -int -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - int rc; - - if (node < 0 || node >= MAX_NUMNODES) { - CDEBUG(D_INFO, - "Invalid NUMA id %d for CPU partition %d\n", node, cpt); - return 0; - } - - mutex_lock(&cpt_data.cpt_mutex); - - cfs_node_to_cpumask(node, cpt_data.cpt_cpumask); - - rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask); - - mutex_unlock(&cpt_data.cpt_mutex); - - return rc; -} -EXPORT_SYMBOL(cfs_cpt_set_node); - -void -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - if (node < 0 || node >= MAX_NUMNODES) { - CDEBUG(D_INFO, - "Invalid NUMA id %d for CPU partition %d\n", node, cpt); - return; - } - - mutex_lock(&cpt_data.cpt_mutex); - - cfs_node_to_cpumask(node, cpt_data.cpt_cpumask); - - cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask); - - mutex_unlock(&cpt_data.cpt_mutex); -} -EXPORT_SYMBOL(cfs_cpt_unset_node); - -int -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - int i; - - for_each_node_mask(i, *mask) { - if (!cfs_cpt_set_node(cptab, cpt, i)) - return 0; - } - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_nodemask); - -void -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - int i; - - for_each_node_mask(i, *mask) - cfs_cpt_unset_node(cptab, cpt, i); -} -EXPORT_SYMBOL(cfs_cpt_unset_nodemask); - -void -cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) -{ - int last; - int i; - - if (cpt == CFS_CPT_ANY) { - last = cptab->ctb_nparts - 1; - cpt = 0; - } else { - last = cpt; - } - - for (; cpt <= last; cpt++) { - for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) - cfs_cpt_unset_cpu(cptab, cpt, i); - } -} -EXPORT_SYMBOL(cfs_cpt_clear); - -int -cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) -{ - nodemask_t *mask; - int weight; - int rotor; - int node; - - /* convert CPU partition ID to HW node id */ - - if (cpt < 0 || cpt >= cptab->ctb_nparts) { - mask = cptab->ctb_nodemask; - rotor = cptab->ctb_spread_rotor++; - } else { - mask = cptab->ctb_parts[cpt].cpt_nodemask; - rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++; - } - - weight = nodes_weight(*mask); - LASSERT(weight > 0); - - rotor %= weight; - - for_each_node_mask(node, *mask) { - if (!rotor--) - return node; - } - - LBUG(); - return 0; -} -EXPORT_SYMBOL(cfs_cpt_spread_node); - -int -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) -{ - int cpu; - int cpt; - - preempt_disable(); - cpu = smp_processor_id(); - cpt = cptab->ctb_cpu2cpt[cpu]; - - if (cpt < 0 && remap) { - /* don't return negative value for safety of upper layer, - * instead we shadow the unknown cpu to a valid partition ID - */ - cpt = cpu % cptab->ctb_nparts; - } - preempt_enable(); - return cpt; -} -EXPORT_SYMBOL(cfs_cpt_current); - -int -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) -{ - LASSERT(cpu >= 0 && cpu < nr_cpu_ids); - - return cptab->ctb_cpu2cpt[cpu]; -} -EXPORT_SYMBOL(cfs_cpt_of_cpu); - -int -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) -{ - cpumask_var_t *cpumask; - nodemask_t *nodemask; - int rc; - int i; - - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - if (cpt == CFS_CPT_ANY) { - cpumask = &cptab->ctb_cpumask; - nodemask = cptab->ctb_nodemask; - } else { - cpumask = &cptab->ctb_parts[cpt].cpt_cpumask; - nodemask = cptab->ctb_parts[cpt].cpt_nodemask; - } - - if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) { - CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n", - cpt); - return -EINVAL; - } - - for_each_online_cpu(i) { - if (cpumask_test_cpu(i, *cpumask)) - continue; - - rc = set_cpus_allowed_ptr(current, *cpumask); - set_mems_allowed(*nodemask); - if (!rc) - schedule(); /* switch to allowed CPU */ - - return rc; - } - - /* don't need to set affinity because all online CPUs are covered */ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_bind); - -/** - * Choose max to \a number CPUs from \a node and set them in \a cpt. - * We always prefer to choose CPU in the same core/socket. - */ -static int -cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, - cpumask_t *node, int number) -{ - cpumask_var_t socket; - cpumask_var_t core; - int rc = 0; - int cpu; - - LASSERT(number > 0); - - if (number >= cpumask_weight(node)) { - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); - - rc = cfs_cpt_set_cpu(cptab, cpt, cpu); - if (!rc) - return -EINVAL; - cpumask_clear_cpu(cpu, node); - } - return 0; - } - - /* - * Allocate scratch buffers - * As we cannot initialize a cpumask_var_t, we need - * to alloc both before we can risk trying to free either - */ - if (!zalloc_cpumask_var(&socket, GFP_NOFS)) - rc = -ENOMEM; - if (!zalloc_cpumask_var(&core, GFP_NOFS)) - rc = -ENOMEM; - if (rc) - goto out; - - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); - - /* get cpumask for cores in the same socket */ - cpumask_copy(socket, topology_core_cpumask(cpu)); - cpumask_and(socket, socket, node); - - LASSERT(!cpumask_empty(socket)); - - while (!cpumask_empty(socket)) { - int i; - - /* get cpumask for hts in the same core */ - cpumask_copy(core, topology_sibling_cpumask(cpu)); - cpumask_and(core, core, node); - - LASSERT(!cpumask_empty(core)); - - for_each_cpu(i, core) { - cpumask_clear_cpu(i, socket); - cpumask_clear_cpu(i, node); - - rc = cfs_cpt_set_cpu(cptab, cpt, i); - if (!rc) { - rc = -EINVAL; - goto out; - } - - if (!--number) - goto out; - } - cpu = cpumask_first(socket); - } - } - -out: - free_cpumask_var(socket); - free_cpumask_var(core); - return rc; -} - -#define CPT_WEIGHT_MIN 4u - -static unsigned int -cfs_cpt_num_estimate(void) -{ - unsigned int nnode = num_online_nodes(); - unsigned int ncpu = num_online_cpus(); - unsigned int ncpt; - - if (ncpu <= CPT_WEIGHT_MIN) { - ncpt = 1; - goto out; - } - - /* generate reasonable number of CPU partitions based on total number - * of CPUs, Preferred N should be power2 and match this condition: - * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 - */ - for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) - ; - - if (ncpt <= nnode) { /* fat numa system */ - while (nnode > ncpt) - nnode >>= 1; - - } else { /* ncpt > nnode */ - while ((nnode << 1) <= ncpt) - nnode <<= 1; - } - - ncpt = nnode; - -out: -#if (BITS_PER_LONG == 32) - /* config many CPU partitions on 32-bit system could consume - * too much memory - */ - ncpt = min(2U, ncpt); -#endif - while (ncpu % ncpt) - ncpt--; /* worst case is 1 */ - - return ncpt; -} - -static struct cfs_cpt_table * -cfs_cpt_table_create(int ncpt) -{ - struct cfs_cpt_table *cptab = NULL; - cpumask_var_t mask; - int cpt = 0; - int num; - int rc; - int i; - - rc = cfs_cpt_num_estimate(); - if (ncpt <= 0) - ncpt = rc; - - if (ncpt > num_online_cpus() || ncpt > 4 * rc) { - CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n", - ncpt, rc); - } - - if (num_online_cpus() % ncpt) { - CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n", - (int)num_online_cpus(), ncpt); - goto failed; - } - - cptab = cfs_cpt_table_alloc(ncpt); - if (!cptab) { - CERROR("Failed to allocate CPU map(%d)\n", ncpt); - goto failed; - } - - num = num_online_cpus() / ncpt; - if (!num) { - CERROR("CPU changed while setting CPU partition\n"); - goto failed; - } - - if (!zalloc_cpumask_var(&mask, GFP_NOFS)) { - CERROR("Failed to allocate scratch cpumask\n"); - goto failed; - } - - for_each_online_node(i) { - cfs_node_to_cpumask(i, mask); - - while (!cpumask_empty(mask)) { - struct cfs_cpu_partition *part; - int n; - - /* - * Each emulated NUMA node has all allowed CPUs in - * the mask. - * End loop when all partitions have assigned CPUs. - */ - if (cpt == ncpt) - break; - - part = &cptab->ctb_parts[cpt]; - - n = num - cpumask_weight(part->cpt_cpumask); - LASSERT(n > 0); - - rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n); - if (rc < 0) - goto failed_mask; - - LASSERT(num >= cpumask_weight(part->cpt_cpumask)); - if (num == cpumask_weight(part->cpt_cpumask)) - cpt++; - } - } - - if (cpt != ncpt || - num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { - CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n", - cptab->ctb_nparts, num, cpt, - cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)); - goto failed_mask; - } - - free_cpumask_var(mask); - - return cptab; - - failed_mask: - free_cpumask_var(mask); - failed: - CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n", - ncpt, num_online_nodes(), num_online_cpus()); - - if (cptab) - cfs_cpt_table_free(cptab); - - return NULL; -} - -static struct cfs_cpt_table * -cfs_cpt_table_create_pattern(char *pattern) -{ - struct cfs_cpt_table *cptab; - char *str; - int node = 0; - int high; - int ncpt = 0; - int cpt; - int rc; - int c; - int i; - - str = strim(pattern); - if (*str == 'n' || *str == 'N') { - pattern = str + 1; - if (*pattern != '\0') { - node = 1; - } else { /* shortcut to create CPT from NUMA & CPU topology */ - node = -1; - ncpt = num_online_nodes(); - } - } - - if (!ncpt) { /* scanning bracket which is mark of partition */ - for (str = pattern;; str++, ncpt++) { - str = strchr(str, '['); - if (!str) - break; - } - } - - if (!ncpt || - (node && ncpt > num_online_nodes()) || - (!node && ncpt > num_online_cpus())) { - CERROR("Invalid pattern %s, or too many partitions %d\n", - pattern, ncpt); - return NULL; - } - - cptab = cfs_cpt_table_alloc(ncpt); - if (!cptab) { - CERROR("Failed to allocate cpu partition table\n"); - return NULL; - } - - if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */ - cpt = 0; - - for_each_online_node(i) { - if (cpt >= ncpt) { - CERROR("CPU changed while setting CPU partition table, %d/%d\n", - cpt, ncpt); - goto failed; - } - - rc = cfs_cpt_set_node(cptab, cpt++, i); - if (!rc) - goto failed; - } - return cptab; - } - - high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; - - for (str = strim(pattern), c = 0;; c++) { - struct cfs_range_expr *range; - struct cfs_expr_list *el; - char *bracket = strchr(str, '['); - int n; - - if (!bracket) { - if (*str) { - CERROR("Invalid pattern %s\n", str); - goto failed; - } - if (c != ncpt) { - CERROR("expect %d partitions but found %d\n", - ncpt, c); - goto failed; - } - break; - } - - if (sscanf(str, "%d%n", &cpt, &n) < 1) { - CERROR("Invalid cpu pattern %s\n", str); - goto failed; - } - - if (cpt < 0 || cpt >= ncpt) { - CERROR("Invalid partition id %d, total partitions %d\n", - cpt, ncpt); - goto failed; - } - - if (cfs_cpt_weight(cptab, cpt)) { - CERROR("Partition %d has already been set.\n", cpt); - goto failed; - } - - str = strim(str + n); - if (str != bracket) { - CERROR("Invalid pattern %s\n", str); - goto failed; - } - - bracket = strchr(str, ']'); - if (!bracket) { - CERROR("missing right bracket for cpt %d, %s\n", - cpt, str); - goto failed; - } - - if (cfs_expr_list_parse(str, (bracket - str) + 1, - 0, high, &el)) { - CERROR("Can't parse number range: %s\n", str); - goto failed; - } - - list_for_each_entry(range, &el->el_exprs, re_link) { - for (i = range->re_lo; i <= range->re_hi; i++) { - if ((i - range->re_lo) % range->re_stride) - continue; - - rc = node ? cfs_cpt_set_node(cptab, cpt, i) : - cfs_cpt_set_cpu(cptab, cpt, i); - if (!rc) { - cfs_expr_list_free(el); - goto failed; - } - } - } - - cfs_expr_list_free(el); - - if (!cfs_cpt_online(cptab, cpt)) { - CERROR("No online CPU is found on partition %d\n", cpt); - goto failed; - } - - str = strim(bracket + 1); - } - - return cptab; - - failed: - cfs_cpt_table_free(cptab); - return NULL; -} - -#ifdef CONFIG_HOTPLUG_CPU -static enum cpuhp_state lustre_cpu_online; - -static void cfs_cpu_incr_cpt_version(void) -{ - spin_lock(&cpt_data.cpt_lock); - cpt_data.cpt_version++; - spin_unlock(&cpt_data.cpt_lock); -} - -static int cfs_cpu_online(unsigned int cpu) -{ - cfs_cpu_incr_cpt_version(); - return 0; -} - -static int cfs_cpu_dead(unsigned int cpu) -{ - bool warn; - - cfs_cpu_incr_cpt_version(); - - mutex_lock(&cpt_data.cpt_mutex); - /* if all HTs in a core are offline, it may break affinity */ - cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu)); - warn = cpumask_any_and(cpt_data.cpt_cpumask, - cpu_online_mask) >= nr_cpu_ids; - mutex_unlock(&cpt_data.cpt_mutex); - CDEBUG(warn ? D_WARNING : D_INFO, - "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n", - cpu); - return 0; -} -#endif - -void -cfs_cpu_fini(void) -{ - if (cfs_cpt_table) - cfs_cpt_table_free(cfs_cpt_table); - -#ifdef CONFIG_HOTPLUG_CPU - if (lustre_cpu_online > 0) - cpuhp_remove_state_nocalls(lustre_cpu_online); - cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD); -#endif - free_cpumask_var(cpt_data.cpt_cpumask); -} - -int -cfs_cpu_init(void) -{ - int ret = 0; - - LASSERT(!cfs_cpt_table); - - memset(&cpt_data, 0, sizeof(cpt_data)); - - if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) { - CERROR("Failed to allocate scratch buffer\n"); - return -1; - } - - spin_lock_init(&cpt_data.cpt_lock); - mutex_init(&cpt_data.cpt_mutex); - -#ifdef CONFIG_HOTPLUG_CPU - ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD, - "staging/lustre/cfe:dead", NULL, - cfs_cpu_dead); - if (ret < 0) - goto failed; - ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "staging/lustre/cfe:online", - cfs_cpu_online, NULL); - if (ret < 0) - goto failed; - lustre_cpu_online = ret; -#endif - ret = -EINVAL; - - if (*cpu_pattern) { - char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL); - - if (!cpu_pattern_dup) { - CERROR("Failed to duplicate cpu_pattern\n"); - goto failed; - } - - cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern_dup); - kfree(cpu_pattern_dup); - if (!cfs_cpt_table) { - CERROR("Failed to create cptab from pattern %s\n", - cpu_pattern); - goto failed; - } - - } else { - cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions); - if (!cfs_cpt_table) { - CERROR("Failed to create ptable with npartitions %d\n", - cpu_npartitions); - goto failed; - } - } - - spin_lock(&cpt_data.cpt_lock); - if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) { - spin_unlock(&cpt_data.cpt_lock); - CERROR("CPU hotplug/unplug during setup\n"); - goto failed; - } - spin_unlock(&cpt_data.cpt_lock); - - LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n", - num_online_nodes(), num_online_cpus(), - cfs_cpt_number(cfs_cpt_table)); - return 0; - - failed: - cfs_cpu_fini(); - return ret; -} - -#endif diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c deleted file mode 100644 index db81ed527452..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - */ - -/* - * This is crypto api shash wrappers to zlib_adler32. - */ - -#include <linux/module.h> -#include <linux/zutil.h> -#include <crypto/internal/hash.h> -#include "linux-crypto.h" - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -static int adler32_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = 1; - - return 0; -} - -static int adler32_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - *mctx = *(u32 *)key; - return 0; -} - -static int adler32_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *cksump = shash_desc_ctx(desc); - - *cksump = *mctx; - - return 0; -} - -static int adler32_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *cksump = shash_desc_ctx(desc); - - *cksump = zlib_adler32(*cksump, data, len); - return 0; -} - -static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len, - u8 *out) -{ - *(u32 *)out = zlib_adler32(*cksump, data, len); - return 0; -} - -static int adler32_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __adler32_finup(shash_desc_ctx(desc), data, len, out); -} - -static int adler32_final(struct shash_desc *desc, u8 *out) -{ - u32 *cksump = shash_desc_ctx(desc); - - *(u32 *)out = *cksump; - return 0; -} - -static int adler32_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg = { - .setkey = adler32_setkey, - .init = adler32_init, - .update = adler32_update, - .final = adler32_final, - .finup = adler32_finup, - .digest = adler32_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "adler32", - .cra_driver_name = "adler32-zlib", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = adler32_cra_init, - } -}; - -int cfs_crypto_adler32_register(void) -{ - return crypto_register_shash(&alg); -} - -void cfs_crypto_adler32_unregister(void) -{ - crypto_unregister_shash(&alg); -} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c deleted file mode 100644 index b55006264155..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c +++ /dev/null @@ -1,443 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - * - * Copyright (c) 2012, Intel Corporation. - */ - -#include <crypto/hash.h> -#include <linux/scatterlist.h> -#include <linux/libcfs/libcfs.h> -#include <linux/libcfs/libcfs_crypto.h> -#include "linux-crypto.h" - -/** - * Array of hash algorithm speed in MByte per second - */ -static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; - -/** - * Initialize the state descriptor for the specified hash algorithm. - * - * An internal routine to allocate the hash-specific state in \a req for - * use with cfs_crypto_hash_digest() to compute the hash of a single message, - * though possibly in multiple chunks. The descriptor internal state should - * be freed with cfs_crypto_hash_final(). - * - * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) - * \param[out] type pointer to the hash description in hash_types[] - * array - * \param[in,out] req hash state descriptor to be initialized - * \param[in] key initial hash value/state, NULL to use default - * value - * \param[in] key_len length of \a key - * - * \retval 0 on success - * \retval negative errno on failure - */ -static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg, - const struct cfs_crypto_hash_type **type, - struct ahash_request **req, - unsigned char *key, - unsigned int key_len) -{ - struct crypto_ahash *tfm; - int err = 0; - - *type = cfs_crypto_hash_type(hash_alg); - - if (!*type) { - CWARN("Unsupported hash algorithm id = %d, max id is %d\n", - hash_alg, CFS_HASH_ALG_MAX); - return -EINVAL; - } - tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC); - - if (IS_ERR(tfm)) { - CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n", - (*type)->cht_name); - return PTR_ERR(tfm); - } - - *req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!*req) { - CDEBUG(D_INFO, "Failed to alloc ahash_request for %s\n", - (*type)->cht_name); - crypto_free_ahash(tfm); - return -ENOMEM; - } - - ahash_request_set_callback(*req, 0, NULL, NULL); - - if (key) - err = crypto_ahash_setkey(tfm, key, key_len); - else if ((*type)->cht_key) - err = crypto_ahash_setkey(tfm, - (unsigned char *)&((*type)->cht_key), - (*type)->cht_size); - - if (err) { - ahash_request_free(*req); - crypto_free_ahash(tfm); - return err; - } - - CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n", - crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm), - cfs_crypto_hash_speeds[hash_alg]); - - err = crypto_ahash_init(*req); - if (err) { - ahash_request_free(*req); - crypto_free_ahash(tfm); - } - return err; -} - -/** - * Calculate hash digest for the passed buffer. - * - * This should be used when computing the hash on a single contiguous buffer. - * It combines the hash initialization, computation, and cleanup. - * - * \param[in] hash_alg id of hash algorithm (CFS_HASH_ALG_*) - * \param[in] buf data buffer on which to compute hash - * \param[in] buf_len length of \a buf in bytes - * \param[in] key initial value/state for algorithm, - * if \a key = NULL use default initial value - * \param[in] key_len length of \a key in bytes - * \param[out] hash pointer to computed hash value, - * if \a hash = NULL then \a hash_len is to digest - * size in bytes, retval -ENOSPC - * \param[in,out] hash_len size of \a hash buffer - * - * \retval -EINVAL \a buf, \a buf_len, \a hash_len, - * \a hash_alg invalid - * \retval -ENOENT \a hash_alg is unsupported - * \retval -ENOSPC \a hash is NULL, or \a hash_len less than - * digest size - * \retval 0 for success - * \retval negative errno for other errors from lower - * layers. - */ -int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, - const void *buf, unsigned int buf_len, - unsigned char *key, unsigned int key_len, - unsigned char *hash, unsigned int *hash_len) -{ - struct scatterlist sl; - struct ahash_request *req; - int err; - const struct cfs_crypto_hash_type *type; - - if (!buf || !buf_len || !hash_len) - return -EINVAL; - - err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); - if (err) - return err; - - if (!hash || *hash_len < type->cht_size) { - *hash_len = type->cht_size; - crypto_free_ahash(crypto_ahash_reqtfm(req)); - ahash_request_free(req); - return -ENOSPC; - } - sg_init_one(&sl, buf, buf_len); - - ahash_request_set_crypt(req, &sl, hash, sl.length); - err = crypto_ahash_digest(req); - crypto_free_ahash(crypto_ahash_reqtfm(req)); - ahash_request_free(req); - - return err; -} -EXPORT_SYMBOL(cfs_crypto_hash_digest); - -/** - * Allocate and initialize descriptor for hash algorithm. - * - * This should be used to initialize a hash descriptor for multiple calls - * to a single hash function when computing the hash across multiple - * separate buffers or pages using cfs_crypto_hash_update{,_page}(). - * - * The hash descriptor should be freed with cfs_crypto_hash_final(). - * - * \param[in] hash_alg algorithm id (CFS_HASH_ALG_*) - * \param[in] key initial value/state for algorithm, if \a key = NULL - * use default initial value - * \param[in] key_len length of \a key in bytes - * - * \retval pointer to descriptor of hash instance - * \retval ERR_PTR(errno) in case of error - */ -struct ahash_request * -cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, - unsigned char *key, unsigned int key_len) -{ - struct ahash_request *req; - int err; - const struct cfs_crypto_hash_type *type; - - err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); - - if (err) - return ERR_PTR(err); - return req; -} -EXPORT_SYMBOL(cfs_crypto_hash_init); - -/** - * Update hash digest computed on data within the given \a page - * - * \param[in] hreq hash state descriptor - * \param[in] page data page on which to compute the hash - * \param[in] offset offset within \a page at which to start hash - * \param[in] len length of data on which to compute hash - * - * \retval 0 for success - * \retval negative errno on failure - */ -int cfs_crypto_hash_update_page(struct ahash_request *req, - struct page *page, unsigned int offset, - unsigned int len) -{ - struct scatterlist sl; - - sg_init_table(&sl, 1); - sg_set_page(&sl, page, len, offset & ~PAGE_MASK); - - ahash_request_set_crypt(req, &sl, NULL, sl.length); - return crypto_ahash_update(req); -} -EXPORT_SYMBOL(cfs_crypto_hash_update_page); - -/** - * Update hash digest computed on the specified data - * - * \param[in] req hash state descriptor - * \param[in] buf data buffer on which to compute the hash - * \param[in] buf_len length of \buf on which to compute hash - * - * \retval 0 for success - * \retval negative errno on failure - */ -int cfs_crypto_hash_update(struct ahash_request *req, - const void *buf, unsigned int buf_len) -{ - struct scatterlist sl; - - sg_init_one(&sl, buf, buf_len); - - ahash_request_set_crypt(req, &sl, NULL, sl.length); - return crypto_ahash_update(req); -} -EXPORT_SYMBOL(cfs_crypto_hash_update); - -/** - * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor - * - * \param[in] req hash descriptor - * \param[out] hash pointer to hash buffer to store hash digest - * \param[in,out] hash_len pointer to hash buffer size, if \a req = NULL - * only free \a req instead of computing the hash - * - * \retval 0 for success - * \retval -EOVERFLOW if hash_len is too small for the hash digest - * \retval negative errno for other errors from lower layers - */ -int cfs_crypto_hash_final(struct ahash_request *req, - unsigned char *hash, unsigned int *hash_len) -{ - int err; - int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); - - if (!hash || !hash_len) { - err = 0; - goto free_ahash; - } - if (*hash_len < size) { - err = -EOVERFLOW; - goto free_ahash; - } - - ahash_request_set_crypt(req, NULL, hash, 0); - err = crypto_ahash_final(req); - if (!err) - *hash_len = size; -free_ahash: - crypto_free_ahash(crypto_ahash_reqtfm(req)); - ahash_request_free(req); - return err; -} -EXPORT_SYMBOL(cfs_crypto_hash_final); - -/** - * Compute the speed of specified hash function - * - * Run a speed test on the given hash algorithm on buffer of the given size. - * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and - * is available through the cfs_crypto_hash_speed() function. - * - * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) - * \param[in] buf data buffer on which to compute the hash - * \param[in] buf_len length of \buf on which to compute hash - */ -static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg) -{ - int buf_len = max(PAGE_SIZE, 1048576UL); - void *buf; - unsigned long start, end; - int bcount, err = 0; - struct page *page; - unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX]; - unsigned int hash_len = sizeof(hash); - - page = alloc_page(GFP_KERNEL); - if (!page) { - err = -ENOMEM; - goto out_err; - } - - buf = kmap(page); - memset(buf, 0xAD, PAGE_SIZE); - kunmap(page); - - for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC), - bcount = 0; time_before(jiffies, end); bcount++) { - struct ahash_request *hdesc; - int i; - - hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0); - if (IS_ERR(hdesc)) { - err = PTR_ERR(hdesc); - break; - } - - for (i = 0; i < buf_len / PAGE_SIZE; i++) { - err = cfs_crypto_hash_update_page(hdesc, page, 0, - PAGE_SIZE); - if (err) - break; - } - - err = cfs_crypto_hash_final(hdesc, hash, &hash_len); - if (err) - break; - } - end = jiffies; - __free_page(page); -out_err: - if (err) { - cfs_crypto_hash_speeds[hash_alg] = err; - CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n", - cfs_crypto_hash_name(hash_alg), err); - } else { - unsigned long tmp; - - tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) * - 1000) / (1024 * 1024); - cfs_crypto_hash_speeds[hash_alg] = (int)tmp; - CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n", - cfs_crypto_hash_name(hash_alg), - cfs_crypto_hash_speeds[hash_alg]); - } -} - -/** - * hash speed in Mbytes per second for valid hash algorithm - * - * Return the performance of the specified \a hash_alg that was previously - * computed using cfs_crypto_performance_test(). - * - * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) - * - * \retval positive speed of the hash function in MB/s - * \retval -ENOENT if \a hash_alg is unsupported - * \retval negative errno if \a hash_alg speed is unavailable - */ -int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg) -{ - if (hash_alg < CFS_HASH_ALG_MAX) - return cfs_crypto_hash_speeds[hash_alg]; - return -ENOENT; -} -EXPORT_SYMBOL(cfs_crypto_hash_speed); - -/** - * Run the performance test for all hash algorithms. - * - * Run the cfs_crypto_performance_test() benchmark for all of the available - * hash functions using a 1MB buffer size. This is a reasonable buffer size - * for Lustre RPCs, even if the actual RPC size is larger or smaller. - * - * Since the setup cost and computation speed of various hash algorithms is - * a function of the buffer size (and possibly internal contention of offload - * engines), this speed only represents an estimate of the actual speed under - * actual usage, but is reasonable for comparing available algorithms. - * - * The actual speeds are available via cfs_crypto_hash_speed() for later - * comparison. - * - * \retval 0 on success - * \retval -ENOMEM if no memory is available for test buffer - */ -static int cfs_crypto_test_hashes(void) -{ - enum cfs_crypto_hash_alg hash_alg; - - for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++) - cfs_crypto_performance_test(hash_alg); - - return 0; -} - -static int adler32; - -/** - * Register available hash functions - * - * \retval 0 - */ -int cfs_crypto_register(void) -{ - request_module("crc32c"); - - adler32 = cfs_crypto_adler32_register(); - - /* check all algorithms and do performance test */ - cfs_crypto_test_hashes(); - return 0; -} - -/** - * Unregister previously registered hash functions - */ -void cfs_crypto_unregister(void) -{ - if (!adler32) - cfs_crypto_adler32_unregister(); -} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h deleted file mode 100644 index 5616e9ea1450..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/** - * Functions for start/stop shash adler32 algorithm. - */ -int cfs_crypto_adler32_register(void); -void cfs_crypto_adler32_unregister(void); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c deleted file mode 100644 index 0092166af258..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c +++ /dev/null @@ -1,145 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/linux/linux-debug.c - * - * Author: Phil Schwan <phil@clusterfs.com> - */ - -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/notifier.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/unistd.h> -#include <linux/interrupt.h> -#include <linux/completion.h> -#include <linux/fs.h> -#include <linux/uaccess.h> -#include <linux/miscdevice.h> - -# define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> - -#include "../tracefile.h" - -#include <linux/kallsyms.h> - -char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; - -/** - * Upcall function once a Lustre log has been dumped. - * - * \param file path of the dumped log - */ -void libcfs_run_debug_log_upcall(char *file) -{ - char *argv[3]; - int rc; - static const char * const envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL - }; - - argv[0] = lnet_debug_log_upcall; - - LASSERTF(file, "called on a null filename\n"); - argv[1] = file; /* only need to pass the path of the file */ - - argv[2] = NULL; - - rc = call_usermodehelper(argv[0], argv, (char **)envp, 1); - if (rc < 0 && rc != -ENOENT) { - CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n", - rc, argv[0], argv[1]); - } else { - CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n", - argv[0], argv[1]); - } -} - -/* coverity[+kill] */ -void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) -{ - libcfs_catastrophe = 1; - libcfs_debug_msg(msgdata, "LBUG\n"); - - if (in_interrupt()) { - panic("LBUG in interrupt.\n"); - /* not reached */ - } - - dump_stack(); - if (!libcfs_panic_on_lbug) - libcfs_debug_dumplog(); - if (libcfs_panic_on_lbug) - panic("LBUG"); - set_current_state(TASK_UNINTERRUPTIBLE); - while (1) - schedule(); -} -EXPORT_SYMBOL(lbug_with_loc); - -static int panic_notifier(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - if (libcfs_panic_in_progress) - return 0; - - libcfs_panic_in_progress = 1; - mb(); - - return 0; -} - -static struct notifier_block libcfs_panic_notifier = { - .notifier_call = panic_notifier, - .next = NULL, - .priority = 10000, -}; - -void libcfs_register_panic_notifier(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, - &libcfs_panic_notifier); -} - -void libcfs_unregister_panic_notifier(void) -{ - atomic_notifier_chain_unregister(&panic_notifier_list, - &libcfs_panic_notifier); -} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c deleted file mode 100644 index ddf625669bff..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c +++ /dev/null @@ -1,197 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> - -#define LNET_MINOR 240 - -static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) -{ - size_t len = sizeof(*data); - - len += cfs_size_round(data->ioc_inllen1); - len += cfs_size_round(data->ioc_inllen2); - return len; -} - -static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) -{ - if (data->ioc_hdr.ioc_len > BIT(30)) { - CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n"); - return true; - } - if (data->ioc_inllen1 > BIT(30)) { - CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); - return true; - } - if (data->ioc_inllen2 > BIT(30)) { - CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); - return true; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); - return true; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); - return true; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); - return true; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); - return true; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); - return true; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); - return true; - } - if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) { - CERROR("LIBCFS ioctl: packlen != ioc_len\n"); - return true; - } - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); - return true; - } - if (data->ioc_inllen2 && - data->ioc_bulk[cfs_size_round(data->ioc_inllen1) + - data->ioc_inllen2 - 1] != '\0') { - CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); - return true; - } - return false; -} - -int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data) -{ - if (libcfs_ioctl_is_invalid(data)) { - CERROR("libcfs ioctl: parameter not correctly formatted\n"); - return -EINVAL; - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - cfs_size_round(data->ioc_inllen1); - - return 0; -} - -int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp, - const struct libcfs_ioctl_hdr __user *uhdr) -{ - struct libcfs_ioctl_hdr hdr; - int err; - - if (copy_from_user(&hdr, uhdr, sizeof(hdr))) - return -EFAULT; - - if (hdr.ioc_version != LIBCFS_IOCTL_VERSION && - hdr.ioc_version != LIBCFS_IOCTL_VERSION2) { - CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n", - LIBCFS_IOCTL_VERSION, hdr.ioc_version); - return -EINVAL; - } - - if (hdr.ioc_len < sizeof(hdr)) { - CERROR("libcfs ioctl: user buffer too small for ioctl\n"); - return -EINVAL; - } - - if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) { - CERROR("libcfs ioctl: user buffer is too large %d/%d\n", - hdr.ioc_len, LIBCFS_IOC_DATA_MAX); - return -EINVAL; - } - - *hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL); - if (!*hdr_pp) - return -ENOMEM; - - if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) { - err = -EFAULT; - goto free; - } - - if ((*hdr_pp)->ioc_version != hdr.ioc_version || - (*hdr_pp)->ioc_len != hdr.ioc_len) { - err = -EINVAL; - goto free; - } - - return 0; - -free: - kvfree(*hdr_pp); - return err; -} - -static long -libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || - _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || - _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - return -EINVAL; - } - - return libcfs_ioctl(cmd, (void __user *)arg); -} - -static const struct file_operations libcfs_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = libcfs_psdev_ioctl, -}; - -struct miscdevice libcfs_dev = { - .minor = LNET_MINOR, - .name = "lnet", - .fops = &libcfs_fops, -}; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c deleted file mode 100644 index 7928d7182634..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE - -#include <linux/libcfs/libcfs.h> -#include "../tracefile.h" - -/* percents to share the total debug memory for each type */ -static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = { - 80, /* 80% pages for CFS_TCD_TYPE_PROC */ - 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */ - 10 /* 10% pages for CFS_TCD_TYPE_IRQ */ -}; - -char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; - -static DECLARE_RWSEM(cfs_tracefile_sem); - -int cfs_tracefile_init_arch(void) -{ - int i; - int j; - struct cfs_trace_cpu_data *tcd; - - /* initialize trace_data */ - memset(cfs_trace_data, 0, sizeof(cfs_trace_data)); - for (i = 0; i < CFS_TCD_TYPE_MAX; i++) { - cfs_trace_data[i] = - kmalloc_array(num_possible_cpus(), - sizeof(union cfs_trace_data_union), - GFP_KERNEL); - if (!cfs_trace_data[i]) - goto out; - } - - /* arch related info initialized */ - cfs_tcd_for_each(tcd, i, j) { - spin_lock_init(&tcd->tcd_lock); - tcd->tcd_pages_factor = pages_factor[i]; - tcd->tcd_type = i; - tcd->tcd_cpu = j; - } - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) { - cfs_trace_console_buffers[i][j] = - kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE, - GFP_KERNEL); - - if (!cfs_trace_console_buffers[i][j]) - goto out; - } - - return 0; - -out: - cfs_tracefile_fini_arch(); - pr_err("lnet: Not enough memory\n"); - return -ENOMEM; -} - -void cfs_tracefile_fini_arch(void) -{ - int i; - int j; - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) { - kfree(cfs_trace_console_buffers[i][j]); - cfs_trace_console_buffers[i][j] = NULL; - } - - for (i = 0; cfs_trace_data[i]; i++) { - kfree(cfs_trace_data[i]); - cfs_trace_data[i] = NULL; - } -} - -void cfs_tracefile_read_lock(void) -{ - down_read(&cfs_tracefile_sem); -} - -void cfs_tracefile_read_unlock(void) -{ - up_read(&cfs_tracefile_sem); -} - -void cfs_tracefile_write_lock(void) -{ - down_write(&cfs_tracefile_sem); -} - -void cfs_tracefile_write_unlock(void) -{ - up_write(&cfs_tracefile_sem); -} - -enum cfs_trace_buf_type cfs_trace_buf_idx_get(void) -{ - if (in_irq()) - return CFS_TCD_TYPE_IRQ; - if (in_softirq()) - return CFS_TCD_TYPE_SOFTIRQ; - return CFS_TCD_TYPE_PROC; -} - -/* - * The walking argument indicates the locking comes from all tcd types - * iterator and we must lock it and dissable local irqs to avoid deadlocks - * with other interrupt locks that might be happening. See LU-1311 - * for details. - */ -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking) - __acquires(&tcd->tc_lock) -{ - __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); - if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) - spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags); - else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) - spin_lock_bh(&tcd->tcd_lock); - else if (unlikely(walking)) - spin_lock_irq(&tcd->tcd_lock); - else - spin_lock(&tcd->tcd_lock); - return 1; -} - -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking) - __releases(&tcd->tcd_lock) -{ - __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); - if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) - spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags); - else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) - spin_unlock_bh(&tcd->tcd_lock); - else if (unlikely(walking)) - spin_unlock_irq(&tcd->tcd_lock); - else - spin_unlock(&tcd->tcd_lock); -} - -void -cfs_set_ptldebug_header(struct ptldebug_header *header, - struct libcfs_debug_msg_data *msgdata, - unsigned long stack) -{ - struct timespec64 ts; - - ktime_get_real_ts64(&ts); - - header->ph_subsys = msgdata->msg_subsys; - header->ph_mask = msgdata->msg_mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_type = cfs_trace_buf_idx_get(); - /* y2038 safe since all user space treats this as unsigned, but - * will overflow in 2106 - */ - header->ph_sec = (u32)ts.tv_sec; - header->ph_usec = ts.tv_nsec / NSEC_PER_USEC; - header->ph_stack = stack; - header->ph_pid = current->pid; - header->ph_line_num = msgdata->msg_line; - header->ph_extern_pid = 0; -} - -static char * -dbghdr_to_err_string(struct ptldebug_header *hdr) -{ - switch (hdr->ph_subsys) { - case S_LND: - case S_LNET: - return "LNetError"; - default: - return "LustreError"; - } -} - -static char * -dbghdr_to_info_string(struct ptldebug_header *hdr) -{ - switch (hdr->ph_subsys) { - case S_LND: - case S_LNET: - return "LNet"; - default: - return "Lustre"; - } -} - -void cfs_print_to_console(struct ptldebug_header *hdr, int mask, - const char *buf, int len, const char *file, - const char *fn) -{ - char *prefix = "Lustre", *ptype = NULL; - - if (mask & D_EMERG) { - prefix = dbghdr_to_err_string(hdr); - ptype = KERN_EMERG; - } else if (mask & D_ERROR) { - prefix = dbghdr_to_err_string(hdr); - ptype = KERN_ERR; - } else if (mask & D_WARNING) { - prefix = dbghdr_to_info_string(hdr); - ptype = KERN_WARNING; - } else if (mask & (D_CONSOLE | libcfs_printk)) { - prefix = dbghdr_to_info_string(hdr); - ptype = KERN_INFO; - } - - if (mask & D_CONSOLE) { - pr_info("%s%s: %.*s", ptype, prefix, len, buf); - } else { - pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, - hdr->ph_pid, hdr->ph_extern_pid, file, - hdr->ph_line_num, fn, len, buf); - } -} - -int cfs_trace_max_debug_mb(void) -{ - int total_mb = (totalram_pages >> (20 - PAGE_SHIFT)); - - return max(512, (total_mb * 80) / 100); -} diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c deleted file mode 100644 index a03f924f1d7c..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/module.c +++ /dev/null @@ -1,604 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <linux/uio.h> - -#include <linux/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/list.h> - -#include <linux/sysctl.h> -#include <linux/debugfs.h> - -# define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> -#include <asm/div64.h> - -#include <linux/libcfs/libcfs_crypto.h> -#include <linux/lnet/lib-lnet.h> -#include <uapi/linux/lnet/lnet-dlc.h> -#include "tracefile.h" - -static struct dentry *lnet_debugfs_root; - -static DECLARE_RWSEM(ioctl_list_sem); -static LIST_HEAD(ioctl_list); - -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (!list_empty(&hand->item)) - rc = -EBUSY; - else - list_add_tail(&hand->item, &ioctl_list); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_register_ioctl); - -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (list_empty(&hand->item)) - rc = -ENOENT; - else - list_del_init(&hand->item); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_deregister_ioctl); - -int libcfs_ioctl(unsigned long cmd, void __user *uparam) -{ - struct libcfs_ioctl_data *data = NULL; - struct libcfs_ioctl_hdr *hdr; - int err; - - /* 'cmd' and permissions get checked in our arch-specific caller */ - err = libcfs_ioctl_getdata(&hdr, uparam); - if (err) { - CDEBUG_LIMIT(D_ERROR, - "libcfs ioctl: data header error %d\n", err); - return err; - } - - if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) { - /* - * The libcfs_ioctl_data_adjust() function performs adjustment - * operations on the libcfs_ioctl_data structure to make - * it usable by the code. This doesn't need to be called - * for new data structures added. - */ - data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); - err = libcfs_ioctl_data_adjust(data); - if (err) - goto out; - } - - CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd); - switch (cmd) { - case IOC_LIBCFS_CLEAR_DEBUG: - libcfs_debug_clear_buffer(); - break; - - case IOC_LIBCFS_MARK_DEBUG: - if (!data || !data->ioc_inlbuf1 || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') { - err = -EINVAL; - goto out; - } - libcfs_debug_mark_buffer(data->ioc_inlbuf1); - break; - - default: { - struct libcfs_ioctl_handler *hand; - - err = -EINVAL; - down_read(&ioctl_list_sem); - list_for_each_entry(hand, &ioctl_list, item) { - err = hand->handle_ioctl(cmd, hdr); - if (err == -EINVAL) - continue; - - if (!err) { - if (copy_to_user(uparam, hdr, hdr->ioc_len)) - err = -EFAULT; - } - break; - } - up_read(&ioctl_list_sem); - break; } - } -out: - kvfree(hdr); - return err; -} - -int lprocfs_call_handler(void *data, int write, loff_t *ppos, - void __user *buffer, size_t *lenp, - int (*handler)(void *data, int write, loff_t pos, - void __user *buffer, int len)) -{ - int rc = handler(data, write, *ppos, buffer, *lenp); - - if (rc < 0) - return rc; - - if (write) { - *ppos += *lenp; - } else { - *lenp = rc; - *ppos += rc; - } - return 0; -} -EXPORT_SYMBOL(lprocfs_call_handler); - -static int __proc_dobitmasks(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - const int tmpstrlen = 512; - char *tmpstr; - int rc; - unsigned int *mask = data; - int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; - int is_printk = (mask == &libcfs_printk) ? 1 : 0; - - rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen); - if (rc < 0) - return rc; - - if (!write) { - libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys); - rc = strlen(tmpstr); - - if (pos >= rc) { - rc = 0; - } else { - rc = cfs_trace_copyout_string(buffer, nob, - tmpstr + pos, "\n"); - } - } else { - rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob); - if (rc < 0) { - kfree(tmpstr); - return rc; - } - - rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys); - /* Always print LBUG/LASSERT to console, so keep this mask */ - if (is_printk) - *mask |= D_EMERG; - } - - kfree(tmpstr); - return rc; -} - -static int proc_dobitmasks(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_dobitmasks); -} - -static int __proc_dump_kernel(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - if (!write) - return 0; - - return cfs_trace_dump_debug_buffer_usrstr(buffer, nob); -} - -static int proc_dump_kernel(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_dump_kernel); -} - -static int __proc_daemon_file(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - if (!write) { - int len = strlen(cfs_tracefile); - - if (pos >= len) - return 0; - - return cfs_trace_copyout_string(buffer, nob, - cfs_tracefile + pos, "\n"); - } - - return cfs_trace_daemon_command_usrstr(buffer, nob); -} - -static int proc_daemon_file(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_daemon_file); -} - -static int libcfs_force_lbug(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - if (write) - LBUG(); - return 0; -} - -static int proc_fail_loc(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - int rc; - long old_fail_loc = cfs_fail_loc; - - rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); - if (old_fail_loc != cfs_fail_loc) - wake_up(&cfs_race_waitq); - return rc; -} - -static int __proc_cpt_table(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - char *buf = NULL; - int len = 4096; - int rc = 0; - - if (write) - return -EPERM; - - LASSERT(cfs_cpt_table); - - while (1) { - buf = kzalloc(len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - rc = cfs_cpt_table_print(cfs_cpt_table, buf, len); - if (rc >= 0) - break; - - if (rc == -EFBIG) { - kfree(buf); - len <<= 1; - continue; - } - goto out; - } - - if (pos >= rc) { - rc = 0; - goto out; - } - - rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL); - out: - kfree(buf); - return rc; -} - -static int proc_cpt_table(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_cpt_table); -} - -static struct ctl_table lnet_table[] = { - { - .procname = "debug", - .data = &libcfs_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "subsystem_debug", - .data = &libcfs_subsystem_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "printk", - .data = &libcfs_printk, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "cpu_partition_table", - .maxlen = 128, - .mode = 0444, - .proc_handler = &proc_cpt_table, - }, - { - .procname = "debug_log_upcall", - .data = lnet_debug_log_upcall, - .maxlen = sizeof(lnet_debug_log_upcall), - .mode = 0644, - .proc_handler = &proc_dostring, - }, - { - .procname = "catastrophe", - .data = &libcfs_catastrophe, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .procname = "dump_kernel", - .maxlen = 256, - .mode = 0200, - .proc_handler = &proc_dump_kernel, - }, - { - .procname = "daemon_file", - .mode = 0644, - .maxlen = 256, - .proc_handler = &proc_daemon_file, - }, - { - .procname = "force_lbug", - .data = NULL, - .maxlen = 0, - .mode = 0200, - .proc_handler = &libcfs_force_lbug - }, - { - .procname = "fail_loc", - .data = &cfs_fail_loc, - .maxlen = sizeof(cfs_fail_loc), - .mode = 0644, - .proc_handler = &proc_fail_loc - }, - { - .procname = "fail_val", - .data = &cfs_fail_val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .procname = "fail_err", - .data = &cfs_fail_err, - .maxlen = sizeof(cfs_fail_err), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - } -}; - -static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = { - { "console_ratelimit", - "/sys/module/libcfs/parameters/libcfs_console_ratelimit"}, - { "debug_path", - "/sys/module/libcfs/parameters/libcfs_debug_file_path"}, - { "panic_on_lbug", - "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"}, - { "libcfs_console_backoff", - "/sys/module/libcfs/parameters/libcfs_console_backoff"}, - { "debug_mb", - "/sys/module/libcfs/parameters/libcfs_debug_mb"}, - { "console_min_delay_centisecs", - "/sys/module/libcfs/parameters/libcfs_console_min_delay"}, - { "console_max_delay_centisecs", - "/sys/module/libcfs/parameters/libcfs_console_max_delay"}, - {}, -}; - -static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - struct ctl_table *table = filp->private_data; - int error; - - error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos); - if (!error) - error = count; - - return error; -} - -static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ctl_table *table = filp->private_data; - int error; - - error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos); - if (!error) - error = count; - - return error; -} - -static const struct file_operations lnet_debugfs_file_operations_rw = { - .open = simple_open, - .read = lnet_debugfs_read, - .write = lnet_debugfs_write, - .llseek = default_llseek, -}; - -static const struct file_operations lnet_debugfs_file_operations_ro = { - .open = simple_open, - .read = lnet_debugfs_read, - .llseek = default_llseek, -}; - -static const struct file_operations lnet_debugfs_file_operations_wo = { - .open = simple_open, - .write = lnet_debugfs_write, - .llseek = default_llseek, -}; - -static const struct file_operations *lnet_debugfs_fops_select(umode_t mode) -{ - if (!(mode & 0222)) - return &lnet_debugfs_file_operations_ro; - - if (!(mode & 0444)) - return &lnet_debugfs_file_operations_wo; - - return &lnet_debugfs_file_operations_rw; -} - -void lustre_insert_debugfs(struct ctl_table *table, - const struct lnet_debugfs_symlink_def *symlinks) -{ - if (!lnet_debugfs_root) - lnet_debugfs_root = debugfs_create_dir("lnet", NULL); - - /* Even if we cannot create, just ignore it altogether) */ - if (IS_ERR_OR_NULL(lnet_debugfs_root)) - return; - - /* We don't save the dentry returned in next two calls, because - * we don't call debugfs_remove() but rather remove_recursive() - */ - for (; table->procname; table++) - debugfs_create_file(table->procname, table->mode, - lnet_debugfs_root, table, - lnet_debugfs_fops_select(table->mode)); - - for (; symlinks && symlinks->name; symlinks++) - debugfs_create_symlink(symlinks->name, lnet_debugfs_root, - symlinks->target); -} -EXPORT_SYMBOL_GPL(lustre_insert_debugfs); - -static void lustre_remove_debugfs(void) -{ - debugfs_remove_recursive(lnet_debugfs_root); - - lnet_debugfs_root = NULL; -} - -static int libcfs_init(void) -{ - int rc; - - rc = libcfs_debug_init(5 * 1024 * 1024); - if (rc < 0) { - pr_err("LustreError: libcfs_debug_init: %d\n", rc); - return rc; - } - - rc = cfs_cpu_init(); - if (rc) - goto cleanup_debug; - - rc = misc_register(&libcfs_dev); - if (rc) { - CERROR("misc_register: error %d\n", rc); - goto cleanup_cpu; - } - - cfs_rehash_wq = alloc_workqueue("cfs_rh", WQ_SYSFS, 4); - if (!cfs_rehash_wq) { - CERROR("Failed to start rehash workqueue.\n"); - rc = -ENOMEM; - goto cleanup_deregister; - } - - rc = cfs_crypto_register(); - if (rc) { - CERROR("cfs_crypto_register: error %d\n", rc); - goto cleanup_deregister; - } - - lustre_insert_debugfs(lnet_table, lnet_debugfs_symlinks); - - CDEBUG(D_OTHER, "portals setup OK\n"); - return 0; - cleanup_deregister: - misc_deregister(&libcfs_dev); -cleanup_cpu: - cfs_cpu_fini(); - cleanup_debug: - libcfs_debug_cleanup(); - return rc; -} - -static void libcfs_exit(void) -{ - int rc; - - lustre_remove_debugfs(); - - if (cfs_rehash_wq) { - destroy_workqueue(cfs_rehash_wq); - cfs_rehash_wq = NULL; - } - - cfs_crypto_unregister(); - - misc_deregister(&libcfs_dev); - - cfs_cpu_fini(); - - rc = libcfs_debug_cleanup(); - if (rc) - pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc); -} - -MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("Lustre helper library"); -MODULE_VERSION(LIBCFS_VERSION); -MODULE_LICENSE("GPL"); - -module_init(libcfs_init); -module_exit(libcfs_exit); diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c deleted file mode 100644 index 4affca750bc5..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.c +++ /dev/null @@ -1,1191 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/tracefile.c - * - * Author: Zach Brown <zab@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE -#define pr_fmt(fmt) "Lustre: " fmt -#include "tracefile.h" - -#include <linux/libcfs/libcfs.h> - -/* XXX move things up to the top, comment */ -union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; - -char cfs_tracefile[TRACEFILE_NAME_SIZE]; -long long cfs_tracefile_size = CFS_TRACEFILE_SIZE; -static struct tracefiled_ctl trace_tctl; -static DEFINE_MUTEX(cfs_trace_thread_mutex); -static int thread_running; - -static atomic_t cfs_tage_allocated = ATOMIC_INIT(0); - -struct page_collection { - struct list_head pc_pages; - /* - * if this flag is set, collect_pages() will spill both - * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, - * only ->tcd_pages are spilled. - */ - int pc_want_daemon_pages; -}; - -struct tracefiled_ctl { - struct completion tctl_start; - struct completion tctl_stop; - wait_queue_head_t tctl_waitq; - pid_t tctl_pid; - atomic_t tctl_shutdown; -}; - -/* - * small data-structure for each page owned by tracefiled. - */ -struct cfs_trace_page { - /* - * page itself - */ - struct page *page; - /* - * linkage into one of the lists in trace_data_union or - * page_collection - */ - struct list_head linkage; - /* - * number of bytes used within this page - */ - unsigned int used; - /* - * cpu that owns this page - */ - unsigned short cpu; - /* - * type(context) of this page - */ - unsigned short type; -}; - -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct cfs_trace_cpu_data *tcd); - -static inline struct cfs_trace_page * -cfs_tage_from_list(struct list_head *list) -{ - return list_entry(list, struct cfs_trace_page, linkage); -} - -static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp) -{ - struct page *page; - struct cfs_trace_page *tage; - - /* My caller is trying to free memory */ - if (!in_interrupt() && memory_pressure_get()) - return NULL; - - /* - * Don't spam console with allocation failures: they will be reported - * by upper layer anyway. - */ - gfp |= __GFP_NOWARN; - page = alloc_page(gfp); - if (!page) - return NULL; - - tage = kmalloc(sizeof(*tage), gfp); - if (!tage) { - __free_page(page); - return NULL; - } - - tage->page = page; - atomic_inc(&cfs_tage_allocated); - return tage; -} - -static void cfs_tage_free(struct cfs_trace_page *tage) -{ - __free_page(tage->page); - kfree(tage); - atomic_dec(&cfs_tage_allocated); -} - -static void cfs_tage_to_tail(struct cfs_trace_page *tage, - struct list_head *queue) -{ - list_move_tail(&tage->linkage, queue); -} - -int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, - struct list_head *stock) -{ - int i; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) { - struct cfs_trace_page *tage; - - tage = cfs_tage_alloc(gfp); - if (!tage) - break; - list_add_tail(&tage->linkage, stock); - } - return i; -} - -/* return a page that has 'len' bytes left at the end */ -static struct cfs_trace_page * -cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len) -{ - struct cfs_trace_page *tage; - - if (tcd->tcd_cur_pages > 0) { - __LASSERT(!list_empty(&tcd->tcd_pages)); - tage = cfs_tage_from_list(tcd->tcd_pages.prev); - if (tage->used + len <= PAGE_SIZE) - return tage; - } - - if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { - if (tcd->tcd_cur_stock_pages > 0) { - tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev); - --tcd->tcd_cur_stock_pages; - list_del_init(&tage->linkage); - } else { - tage = cfs_tage_alloc(GFP_ATOMIC); - if (unlikely(!tage)) { - if (!memory_pressure_get() || in_interrupt()) - pr_warn_ratelimited("cannot allocate a tage (%ld)\n", - tcd->tcd_cur_pages); - return NULL; - } - } - - tage->used = 0; - tage->cpu = smp_processor_id(); - tage->type = tcd->tcd_type; - list_add_tail(&tage->linkage, &tcd->tcd_pages); - tcd->tcd_cur_pages++; - - if (tcd->tcd_cur_pages > 8 && thread_running) { - struct tracefiled_ctl *tctl = &trace_tctl; - /* - * wake up tracefiled to process some pages. - */ - wake_up(&tctl->tctl_waitq); - } - return tage; - } - return NULL; -} - -static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd) -{ - int pgcount = tcd->tcd_cur_pages / 10; - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - pr_warn_ratelimited("debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n", - pgcount + 1, tcd->tcd_cur_pages); - - INIT_LIST_HEAD(&pc.pc_pages); - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - if (!pgcount--) - break; - - list_move_tail(&tage->linkage, &pc.pc_pages); - tcd->tcd_cur_pages--; - } - put_pages_on_tcd_daemon_list(&pc, tcd); -} - -/* return a page that has 'len' bytes left at the end */ -static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd, - unsigned long len) -{ - struct cfs_trace_page *tage; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - if (len > PAGE_SIZE) { - pr_err("cowardly refusing to write %lu bytes in a page\n", len); - return NULL; - } - - tage = cfs_trace_get_tage_try(tcd, len); - if (tage) - return tage; - if (thread_running) - cfs_tcd_shrink(tcd); - if (tcd->tcd_cur_pages > 0) { - tage = cfs_tage_from_list(tcd->tcd_pages.next); - tage->used = 0; - cfs_tage_to_tail(tage, &tcd->tcd_pages); - } - return tage; -} - -int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, - const char *format, ...) -{ - va_list args; - int rc; - - va_start(args, format); - rc = libcfs_debug_vmsg2(msgdata, format, args, NULL); - va_end(args); - - return rc; -} -EXPORT_SYMBOL(libcfs_debug_msg); - -int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, - const char *format1, va_list args, - const char *format2, ...) -{ - struct cfs_trace_cpu_data *tcd = NULL; - struct ptldebug_header header = { 0 }; - struct cfs_trace_page *tage; - /* string_buf is used only if tcd != NULL, and is always set then */ - char *string_buf = NULL; - char *debug_buf; - int known_size; - int needed = 85; /* average message length */ - int max_nob; - va_list ap; - int depth; - int i; - int remain; - int mask = msgdata->msg_mask; - const char *file = kbasename(msgdata->msg_file); - struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; - - tcd = cfs_trace_get_tcd(); - - /* cfs_trace_get_tcd() grabs a lock, which disables preemption and - * pins us to a particular CPU. This avoids an smp_processor_id() - * warning on Linux when debugging is enabled. - */ - cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK()); - - if (!tcd) /* arch may not log in IRQ context */ - goto console; - - if (!tcd->tcd_cur_pages) - header.ph_flags |= PH_FLAG_FIRST_RECORD; - - if (tcd->tcd_shutting_down) { - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - depth = __current_nesting_level(); - known_size = strlen(file) + 1 + depth; - if (msgdata->msg_fn) - known_size += strlen(msgdata->msg_fn) + 1; - - if (libcfs_debug_binary) - known_size += sizeof(header); - - /* - * '2' used because vsnprintf return real size required for output - * _without_ terminating NULL. - * if needed is to small for this format. - */ - for (i = 0; i < 2; i++) { - tage = cfs_trace_get_tage(tcd, needed + known_size + 1); - if (!tage) { - if (needed + known_size > PAGE_SIZE) - mask |= D_ERROR; - - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - string_buf = (char *)page_address(tage->page) + - tage->used + known_size; - - max_nob = PAGE_SIZE - tage->used - known_size; - if (max_nob <= 0) { - pr_emerg("negative max_nob: %d\n", max_nob); - mask |= D_ERROR; - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, max_nob, format1, ap); - va_end(ap); - } - - if (format2) { - remain = max_nob - needed; - if (remain < 0) - remain = 0; - - va_start(ap, format2); - needed += vsnprintf(string_buf + needed, remain, - format2, ap); - va_end(ap); - } - - if (needed < max_nob) /* well. printing ok.. */ - break; - } - - if (*(string_buf + needed - 1) != '\n') - pr_info("format at %s:%d:%s doesn't end in newline\n", file, - msgdata->msg_line, msgdata->msg_fn); - - header.ph_len = known_size + needed; - debug_buf = (char *)page_address(tage->page) + tage->used; - - if (libcfs_debug_binary) { - memcpy(debug_buf, &header, sizeof(header)); - tage->used += sizeof(header); - debug_buf += sizeof(header); - } - - /* indent message according to the nesting level */ - while (depth-- > 0) { - *(debug_buf++) = '.'; - ++tage->used; - } - - strcpy(debug_buf, file); - tage->used += strlen(file) + 1; - debug_buf += strlen(file) + 1; - - if (msgdata->msg_fn) { - strcpy(debug_buf, msgdata->msg_fn); - tage->used += strlen(msgdata->msg_fn) + 1; - debug_buf += strlen(msgdata->msg_fn) + 1; - } - - __LASSERT(debug_buf == string_buf); - - tage->used += needed; - __LASSERT(tage->used <= PAGE_SIZE); - -console: - if (!(mask & libcfs_printk)) { - /* no console output requested */ - if (tcd) - cfs_trace_put_tcd(tcd); - return 1; - } - - if (cdls) { - if (libcfs_console_ratelimit && - cdls->cdls_next && /* not first time ever */ - !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { - /* skipping a console message */ - cdls->cdls_count++; - if (tcd) - cfs_trace_put_tcd(tcd); - return 1; - } - - if (cfs_time_after(cfs_time_current(), - cdls->cdls_next + libcfs_console_max_delay + - 10 * HZ)) { - /* last timeout was a long time ago */ - cdls->cdls_delay /= libcfs_console_backoff * 4; - } else { - cdls->cdls_delay *= libcfs_console_backoff; - } - - if (cdls->cdls_delay < libcfs_console_min_delay) - cdls->cdls_delay = libcfs_console_min_delay; - else if (cdls->cdls_delay > libcfs_console_max_delay) - cdls->cdls_delay = libcfs_console_max_delay; - - /* ensure cdls_next is never zero after it's been seen */ - cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; - } - - if (tcd) { - cfs_print_to_console(&header, mask, string_buf, needed, file, - msgdata->msg_fn); - cfs_trace_put_tcd(tcd); - } else { - string_buf = cfs_trace_get_console_buffer(); - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, - CFS_TRACE_CONSOLE_BUFFER_SIZE, - format1, ap); - va_end(ap); - } - if (format2) { - remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed; - if (remain > 0) { - va_start(ap, format2); - needed += vsnprintf(string_buf + needed, remain, - format2, ap); - va_end(ap); - } - } - cfs_print_to_console(&header, mask, - string_buf, needed, file, msgdata->msg_fn); - - put_cpu(); - } - - if (cdls && cdls->cdls_count) { - string_buf = cfs_trace_get_console_buffer(); - - needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, - "Skipped %d previous similar message%s\n", - cdls->cdls_count, - (cdls->cdls_count > 1) ? "s" : ""); - - cfs_print_to_console(&header, mask, - string_buf, needed, file, msgdata->msg_fn); - - put_cpu(); - cdls->cdls_count = 0; - } - - return 0; -} -EXPORT_SYMBOL(libcfs_debug_vmsg2); - -void -cfs_trace_assertion_failed(const char *str, - struct libcfs_debug_msg_data *msgdata) -{ - struct ptldebug_header hdr; - - libcfs_panic_in_progress = 1; - libcfs_catastrophe = 1; - mb(); - - cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK()); - - cfs_print_to_console(&hdr, D_EMERG, str, strlen(str), - msgdata->msg_file, msgdata->msg_fn); - - panic("Lustre debug assertion failure\n"); - - /* not reached */ -} - -static void -panic_collect_pages(struct page_collection *pc) -{ - /* Do the collect_pages job on a single CPU: assumes that all other - * CPUs have been stopped during a panic. If this isn't true for some - * arch, this will have to be implemented separately in each arch. - */ - struct cfs_trace_cpu_data *tcd; - int i; - int j; - - INIT_LIST_HEAD(&pc->pc_pages); - - cfs_tcd_for_each(tcd, i, j) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } -} - -static void collect_pages_on_all_cpus(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, - &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } - } -} - -static void collect_pages(struct page_collection *pc) -{ - INIT_LIST_HEAD(&pc->pc_pages); - - if (libcfs_panic_in_progress) - panic_collect_pages(pc); - else - collect_pages_on_all_cpus(pc); -} - -static void put_pages_back_on_all_cpus(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - struct list_head *cur_head; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - cur_head = tcd->tcd_pages.next; - - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, - linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != cpu || tage->type != i) - continue; - - cfs_tage_to_tail(tage, cur_head); - tcd->tcd_cur_pages++; - } - } - } -} - -static void put_pages_back(struct page_collection *pc) -{ - if (!libcfs_panic_in_progress) - put_pages_back_on_all_cpus(pc); -} - -/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that - * we have a good amount of data at all times for dumping during an LBUG, even - * if we have been steadily writing (and otherwise discarding) pages via the - * debug daemon. - */ -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct cfs_trace_cpu_data *tcd) -{ - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type) - continue; - - cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages); - tcd->tcd_cur_daemon_pages++; - - if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { - struct cfs_trace_page *victim; - - __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); - victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next); - - __LASSERT_TAGE_INVARIANT(victim); - - list_del(&victim->linkage); - cfs_tage_free(victim); - tcd->tcd_cur_daemon_pages--; - } - } -} - -static void put_pages_on_daemon_list(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) - put_pages_on_tcd_daemon_list(pc, tcd); - } -} - -void cfs_trace_debug_print(void) -{ - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - char *p, *file, *fn; - struct page *page; - - __LASSERT_TAGE_INVARIANT(tage); - - page = tage->page; - p = page_address(page); - while (p < ((char *)page_address(page) + tage->used)) { - struct ptldebug_header *hdr; - int len; - - hdr = (void *)p; - p += sizeof(*hdr); - file = p; - p += strlen(file) + 1; - fn = p; - p += strlen(fn) + 1; - len = hdr->ph_len - (int)(p - (char *)hdr); - - cfs_print_to_console(hdr, D_EMERG, p, len, file, fn); - - p += len; - } - - list_del(&tage->linkage); - cfs_tage_free(tage); - } -} - -int cfs_tracefile_dump_all_pages(char *filename) -{ - struct page_collection pc; - struct file *filp; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - char *buf; - mm_segment_t __oldfs; - int rc; - - cfs_tracefile_write_lock(); - - filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, - 0600); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - filp = NULL; - pr_err("LustreError: can't open %s for dump: rc %d\n", - filename, rc); - goto out; - } - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) { - rc = 0; - goto close; - } - __oldfs = get_fs(); - set_fs(get_ds()); - - /* ok, for now, just write the pages. in the future we'll be building - * iobufs with the pages and calling generic_direct_IO - */ - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - buf = kmap(tage->page); - rc = kernel_write(filp, buf, tage->used, &filp->f_pos); - kunmap(tage->page); - - if (rc != (int)tage->used) { - pr_warn("wanted to write %u but wrote %d\n", tage->used, - rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - list_del(&tage->linkage); - cfs_tage_free(tage); - } - set_fs(__oldfs); - rc = vfs_fsync(filp, 1); - if (rc) - pr_err("sync returns %d\n", rc); -close: - filp_close(filp, NULL); -out: - cfs_tracefile_write_unlock(); - return rc; -} - -void cfs_trace_flush_pages(void) -{ - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - cfs_tage_free(tage); - } -} - -int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char __user *usr_buffer, int usr_buffer_nob) -{ - int nob; - - if (usr_buffer_nob > knl_buffer_nob) - return -EOVERFLOW; - - if (copy_from_user((void *)knl_buffer, - usr_buffer, usr_buffer_nob)) - return -EFAULT; - - nob = strnlen(knl_buffer, usr_buffer_nob); - while (--nob >= 0) /* strip trailing whitespace */ - if (!isspace(knl_buffer[nob])) - break; - - if (nob < 0) /* empty string */ - return -EINVAL; - - if (nob == knl_buffer_nob) /* no space to terminate */ - return -EOVERFLOW; - - knl_buffer[nob + 1] = 0; /* terminate */ - return 0; -} -EXPORT_SYMBOL(cfs_trace_copyin_string); - -int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, - const char *knl_buffer, char *append) -{ - /* - * NB if 'append' != NULL, it's a single character to append to the - * copied out string - usually "\n" or "" (i.e. a terminating zero byte) - */ - int nob = strlen(knl_buffer); - - if (nob > usr_buffer_nob) - nob = usr_buffer_nob; - - if (copy_to_user(usr_buffer, knl_buffer, nob)) - return -EFAULT; - - if (append && nob < usr_buffer_nob) { - if (copy_to_user(usr_buffer + nob, append, 1)) - return -EFAULT; - - nob++; - } - - return nob; -} -EXPORT_SYMBOL(cfs_trace_copyout_string); - -int cfs_trace_allocate_string_buffer(char **str, int nob) -{ - if (nob > 2 * PAGE_SIZE) /* string must be "sensible" */ - return -EINVAL; - - *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO); - if (!*str) - return -ENOMEM; - - return 0; -} - -int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc) - return rc; - - rc = cfs_trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (rc) - goto out; - - if (str[0] != '/') { - rc = -EINVAL; - goto out; - } - rc = cfs_tracefile_dump_all_pages(str); -out: - kfree(str); - return rc; -} - -int cfs_trace_daemon_command(char *str) -{ - int rc = 0; - - cfs_tracefile_write_lock(); - - if (!strcmp(str, "stop")) { - cfs_tracefile_write_unlock(); - cfs_trace_stop_thread(); - cfs_tracefile_write_lock(); - memset(cfs_tracefile, 0, sizeof(cfs_tracefile)); - - } else if (!strncmp(str, "size=", 5)) { - unsigned long tmp; - - rc = kstrtoul(str + 5, 10, &tmp); - if (!rc) { - if (tmp < 10 || tmp > 20480) - cfs_tracefile_size = CFS_TRACEFILE_SIZE; - else - cfs_tracefile_size = tmp << 20; - } - } else if (strlen(str) >= sizeof(cfs_tracefile)) { - rc = -ENAMETOOLONG; - } else if (str[0] != '/') { - rc = -EINVAL; - } else { - strcpy(cfs_tracefile, str); - - pr_info("debug daemon will attempt to start writing to %s (%lukB max)\n", - cfs_tracefile, - (long)(cfs_tracefile_size >> 10)); - - cfs_trace_start_thread(); - } - - cfs_tracefile_write_unlock(); - return rc; -} - -int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc) - return rc; - - rc = cfs_trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (!rc) - rc = cfs_trace_daemon_command(str); - - kfree(str); - return rc; -} - -int cfs_trace_set_debug_mb(int mb) -{ - int i; - int j; - int pages; - int limit = cfs_trace_max_debug_mb(); - struct cfs_trace_cpu_data *tcd; - - if (mb < num_possible_cpus()) { - pr_warn("%d MB is too small for debug buffer size, setting it to %d MB.\n", - mb, num_possible_cpus()); - mb = num_possible_cpus(); - } - - if (mb > limit) { - pr_warn("%d MB is too large for debug buffer size, setting it to %d MB.\n", - mb, limit); - mb = limit; - } - - mb /= num_possible_cpus(); - pages = mb << (20 - PAGE_SHIFT); - - cfs_tracefile_write_lock(); - - cfs_tcd_for_each(tcd, i, j) - tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; - - cfs_tracefile_write_unlock(); - - return 0; -} - -int cfs_trace_get_debug_mb(void) -{ - int i; - int j; - struct cfs_trace_cpu_data *tcd; - int total_pages = 0; - - cfs_tracefile_read_lock(); - - cfs_tcd_for_each(tcd, i, j) - total_pages += tcd->tcd_max_pages; - - cfs_tracefile_read_unlock(); - - return (total_pages >> (20 - PAGE_SHIFT)) + 1; -} - -static int tracefiled(void *arg) -{ - struct page_collection pc; - struct tracefiled_ctl *tctl = arg; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - struct file *filp; - char *buf; - int last_loop = 0; - int rc; - - /* we're started late enough that we pick up init's fs context */ - /* this is so broken in uml? what on earth is going on? */ - - complete(&tctl->tctl_start); - - while (1) { - wait_queue_entry_t __wait; - - pc.pc_want_daemon_pages = 0; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) - goto end_loop; - - filp = NULL; - cfs_tracefile_read_lock(); - if (cfs_tracefile[0]) { - filp = filp_open(cfs_tracefile, - O_CREAT | O_RDWR | O_LARGEFILE, - 0600); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - filp = NULL; - pr_warn("couldn't open %s: %d\n", cfs_tracefile, - rc); - } - } - cfs_tracefile_read_unlock(); - if (!filp) { - put_pages_on_daemon_list(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - goto end_loop; - } - - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - static loff_t f_pos; - - __LASSERT_TAGE_INVARIANT(tage); - - if (f_pos >= (off_t)cfs_tracefile_size) - f_pos = 0; - else if (f_pos > i_size_read(file_inode(filp))) - f_pos = i_size_read(file_inode(filp)); - - buf = kmap(tage->page); - rc = kernel_write(filp, buf, tage->used, &f_pos); - kunmap(tage->page); - - if (rc != (int)tage->used) { - pr_warn("wanted to write %u but wrote %d\n", - tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - } - - filp_close(filp, NULL); - put_pages_on_daemon_list(&pc); - if (!list_empty(&pc.pc_pages)) { - int i; - - pr_alert("trace pages aren't empty\n"); - pr_err("total cpus(%d): ", num_possible_cpus()); - for (i = 0; i < num_possible_cpus(); i++) - if (cpu_online(i)) - pr_cont("%d(on) ", i); - else - pr_cont("%d(off) ", i); - pr_cont("\n"); - - i = 0; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, - linkage) - pr_err("page %d belongs to cpu %d\n", - ++i, tage->cpu); - pr_err("There are %d pages unwritten\n", i); - } - __LASSERT(list_empty(&pc.pc_pages)); -end_loop: - if (atomic_read(&tctl->tctl_shutdown)) { - if (!last_loop) { - last_loop = 1; - continue; - } else { - break; - } - } - init_waitqueue_entry(&__wait, current); - add_wait_queue(&tctl->tctl_waitq, &__wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - remove_wait_queue(&tctl->tctl_waitq, &__wait); - } - complete(&tctl->tctl_stop); - return 0; -} - -int cfs_trace_start_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - struct task_struct *task; - int rc = 0; - - mutex_lock(&cfs_trace_thread_mutex); - if (thread_running) - goto out; - - init_completion(&tctl->tctl_start); - init_completion(&tctl->tctl_stop); - init_waitqueue_head(&tctl->tctl_waitq); - atomic_set(&tctl->tctl_shutdown, 0); - - task = kthread_run(tracefiled, tctl, "ktracefiled"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - goto out; - } - - wait_for_completion(&tctl->tctl_start); - thread_running = 1; -out: - mutex_unlock(&cfs_trace_thread_mutex); - return rc; -} - -void cfs_trace_stop_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - - mutex_lock(&cfs_trace_thread_mutex); - if (thread_running) { - pr_info("shutting down debug daemon thread...\n"); - atomic_set(&tctl->tctl_shutdown, 1); - wait_for_completion(&tctl->tctl_stop); - thread_running = 0; - } - mutex_unlock(&cfs_trace_thread_mutex); -} - -int cfs_tracefile_init(int max_pages) -{ - struct cfs_trace_cpu_data *tcd; - int i; - int j; - int rc; - int factor; - - rc = cfs_tracefile_init_arch(); - if (rc) - return rc; - - cfs_tcd_for_each(tcd, i, j) { - /* tcd_pages_factor is initialized int tracefile_init_arch. */ - factor = tcd->tcd_pages_factor; - INIT_LIST_HEAD(&tcd->tcd_pages); - INIT_LIST_HEAD(&tcd->tcd_stock_pages); - INIT_LIST_HEAD(&tcd->tcd_daemon_pages); - tcd->tcd_cur_pages = 0; - tcd->tcd_cur_stock_pages = 0; - tcd->tcd_cur_daemon_pages = 0; - tcd->tcd_max_pages = (max_pages * factor) / 100; - LASSERT(tcd->tcd_max_pages > 0); - tcd->tcd_shutting_down = 0; - } - - return 0; -} - -static void trace_cleanup_on_all_cpus(void) -{ - struct cfs_trace_cpu_data *tcd; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - tcd->tcd_shutting_down = 1; - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, - linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - cfs_tage_free(tage); - } - - tcd->tcd_cur_pages = 0; - } - } -} - -static void cfs_trace_cleanup(void) -{ - struct page_collection pc; - - INIT_LIST_HEAD(&pc.pc_pages); - - trace_cleanup_on_all_cpus(); - - cfs_tracefile_fini_arch(); -} - -void cfs_tracefile_exit(void) -{ - cfs_trace_stop_thread(); - cfs_trace_cleanup(); -} diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h deleted file mode 100644 index a29d6eb3a785..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.h +++ /dev/null @@ -1,263 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LIBCFS_TRACEFILE_H__ -#define __LIBCFS_TRACEFILE_H__ - -#include <linux/libcfs/libcfs.h> - -enum cfs_trace_buf_type { - CFS_TCD_TYPE_PROC = 0, - CFS_TCD_TYPE_SOFTIRQ, - CFS_TCD_TYPE_IRQ, - CFS_TCD_TYPE_MAX -}; - -/* trace file lock routines */ - -#define TRACEFILE_NAME_SIZE 1024 -extern char cfs_tracefile[TRACEFILE_NAME_SIZE]; -extern long long cfs_tracefile_size; - -void libcfs_run_debug_log_upcall(char *file); - -int cfs_tracefile_init_arch(void); -void cfs_tracefile_fini_arch(void); - -void cfs_tracefile_read_lock(void); -void cfs_tracefile_read_unlock(void); -void cfs_tracefile_write_lock(void); -void cfs_tracefile_write_unlock(void); - -int cfs_tracefile_dump_all_pages(char *filename); -void cfs_trace_debug_print(void); -void cfs_trace_flush_pages(void); -int cfs_trace_start_thread(void); -void cfs_trace_stop_thread(void); -int cfs_tracefile_init(int max_pages); -void cfs_tracefile_exit(void); - -int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char __user *usr_buffer, int usr_buffer_nob); -int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, - const char *knl_str, char *append); -int cfs_trace_allocate_string_buffer(char **str, int nob); -int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob); -int cfs_trace_daemon_command(char *str); -int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob); -int cfs_trace_set_debug_mb(int mb); -int cfs_trace_get_debug_mb(void); - -void libcfs_debug_dumplog_internal(void *arg); -void libcfs_register_panic_notifier(void); -void libcfs_unregister_panic_notifier(void); -extern int libcfs_panic_in_progress; -int cfs_trace_max_debug_mb(void); - -#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) -#define CFS_TRACEFILE_SIZE (500 << 20) - -#ifdef LUSTRE_TRACEFILE_PRIVATE - -/* - * Private declare for tracefile - */ -#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) - -#define CFS_TRACEFILE_SIZE (500 << 20) - -/* - * Size of a buffer for sprinting console messages if we can't get a page - * from system - */ -#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024 - -union cfs_trace_data_union { - struct cfs_trace_cpu_data { - /* - * Even though this structure is meant to be per-CPU, locking - * is needed because in some places the data may be accessed - * from other CPUs. This lock is directly used in trace_get_tcd - * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and - * tcd_for_each_type_lock - */ - spinlock_t tcd_lock; - unsigned long tcd_lock_flags; - - /* - * pages with trace records not yet processed by tracefiled. - */ - struct list_head tcd_pages; - /* number of pages on ->tcd_pages */ - unsigned long tcd_cur_pages; - - /* - * pages with trace records already processed by - * tracefiled. These pages are kept in memory, so that some - * portion of log can be written in the event of LBUG. This - * list is maintained in LRU order. - * - * Pages are moved to ->tcd_daemon_pages by tracefiled() - * (put_pages_on_daemon_list()). LRU pages from this list are - * discarded when list grows too large. - */ - struct list_head tcd_daemon_pages; - /* number of pages on ->tcd_daemon_pages */ - unsigned long tcd_cur_daemon_pages; - - /* - * Maximal number of pages allowed on ->tcd_pages and - * ->tcd_daemon_pages each. - * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current - * implementation. - */ - unsigned long tcd_max_pages; - - /* - * preallocated pages to write trace records into. Pages from - * ->tcd_stock_pages are moved to ->tcd_pages by - * portals_debug_msg(). - * - * This list is necessary, because on some platforms it's - * impossible to perform efficient atomic page allocation in a - * non-blockable context. - * - * Such platforms fill ->tcd_stock_pages "on occasion", when - * tracing code is entered in blockable context. - * - * trace_get_tage_try() tries to get a page from - * ->tcd_stock_pages first and resorts to atomic page - * allocation only if this queue is empty. ->tcd_stock_pages - * is replenished when tracing code is entered in blocking - * context (darwin-tracefile.c:trace_get_tcd()). We try to - * maintain TCD_STOCK_PAGES (40 by default) pages in this - * queue. Atomic allocation is only required if more than - * TCD_STOCK_PAGES pagesful are consumed by trace records all - * emitted in non-blocking contexts. Which is quite unlikely. - */ - struct list_head tcd_stock_pages; - /* number of pages on ->tcd_stock_pages */ - unsigned long tcd_cur_stock_pages; - - unsigned short tcd_shutting_down; - unsigned short tcd_cpu; - unsigned short tcd_type; - /* The factors to share debug memory. */ - unsigned short tcd_pages_factor; - } tcd; - char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))]; -}; - -#define TCD_MAX_TYPES 8 -extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS]; - -#define cfs_tcd_for_each(tcd, i, j) \ - for (i = 0; cfs_trace_data[i]; i++) \ - for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \ - j < num_possible_cpus(); \ - j++, (tcd) = &(*cfs_trace_data[i])[j].tcd) - -#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \ - for (i = 0; cfs_trace_data[i] && \ - (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \ - cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++) - -void cfs_set_ptldebug_header(struct ptldebug_header *header, - struct libcfs_debug_msg_data *m, - unsigned long stack); -void cfs_print_to_console(struct ptldebug_header *hdr, int mask, - const char *buf, int len, const char *file, - const char *fn); - -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking); -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking); - -extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; -enum cfs_trace_buf_type cfs_trace_buf_idx_get(void); - -static inline char * -cfs_trace_get_console_buffer(void) -{ - unsigned int i = get_cpu(); - unsigned int j = cfs_trace_buf_idx_get(); - - return cfs_trace_console_buffers[i][j]; -} - -static inline struct cfs_trace_cpu_data * -cfs_trace_get_tcd(void) -{ - struct cfs_trace_cpu_data *tcd = - &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd; - - cfs_trace_lock_tcd(tcd, 0); - - return tcd; -} - -static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd) -{ - cfs_trace_unlock_tcd(tcd, 0); - - put_cpu(); -} - -int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, - struct list_head *stock); - -void cfs_trace_assertion_failed(const char *str, - struct libcfs_debug_msg_data *m); - -/* ASSERTION that is safe to use within the debug system */ -#define __LASSERT(cond) \ -do { \ - if (unlikely(!(cond))) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \ - cfs_trace_assertion_failed("ASSERTION("#cond") failed", \ - &msgdata); \ - } \ -} while (0) - -#define __LASSERT_TAGE_INVARIANT(tage) \ -do { \ - __LASSERT(tage); \ - __LASSERT(tage->page); \ - __LASSERT(tage->used <= PAGE_SIZE); \ - __LASSERT(page_count(tage->page) > 0); \ -} while (0) - -#endif /* LUSTRE_TRACEFILE_PRIVATE */ - -#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/drivers/staging/lustre/lnet/lnet/Makefile b/drivers/staging/lustre/lnet/lnet/Makefile deleted file mode 100644 index 0a9d70924fe0..000000000000 --- a/drivers/staging/lustre/lnet/lnet/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET) += lnet.o - -lnet-y := api-ni.o config.o nidstrings.o net_fault.o \ - lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o \ - lib-socket.o lib-move.o module.o lo.o \ - router.o router_proc.o acceptor.o peer.o diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c deleted file mode 100644 index 5648f17eddc0..000000000000 --- a/drivers/staging/lustre/lnet/lnet/acceptor.c +++ /dev/null @@ -1,501 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <linux/completion.h> -#include <net/sock.h> -#include <linux/lnet/lib-lnet.h> - -static int accept_port = 988; -static int accept_backlog = 127; -static int accept_timeout = 5; - -static struct { - int pta_shutdown; - struct socket *pta_sock; - struct completion pta_signal; -} lnet_acceptor_state = { - .pta_shutdown = 1 -}; - -int -lnet_acceptor_port(void) -{ - return accept_port; -} -EXPORT_SYMBOL(lnet_acceptor_port); - -static inline int -lnet_accept_magic(__u32 magic, __u32 constant) -{ - return (magic == constant || - magic == __swab32(constant)); -} - -static char *accept = "secure"; - -module_param(accept, charp, 0444); -MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)"); -module_param(accept_port, int, 0444); -MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)"); -module_param(accept_backlog, int, 0444); -MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog"); -module_param(accept_timeout, int, 0644); -MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)"); - -static char *accept_type; - -static int -lnet_acceptor_get_tunables(void) -{ - /* - * Userland acceptor uses 'accept_type' instead of 'accept', due to - * conflict with 'accept(2)', but kernel acceptor still uses 'accept' - * for compatibility. Hence the trick. - */ - accept_type = accept; - return 0; -} - -int -lnet_acceptor_timeout(void) -{ - return accept_timeout; -} -EXPORT_SYMBOL(lnet_acceptor_timeout); - -void -lnet_connect_console_error(int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int peer_port) -{ - switch (rc) { - /* "normal" errors */ - case -ECONNREFUSED: - CNETERR("Connection to %s at host %pI4h on port %d was refused: check that Lustre is running on that node.\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - case -EHOSTUNREACH: - case -ENETUNREACH: - CNETERR("Connection to %s at host %pI4h was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n", - libcfs_nid2str(peer_nid), &peer_ip); - break; - case -ETIMEDOUT: - CNETERR("Connection to %s at host %pI4h on port %d took too long: that node may be hung or experiencing high load.\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - case -ECONNRESET: - LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h on port %d was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port, - libcfs_nid2str(peer_nid)); - break; - case -EPROTO: - LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at host %pI4h on port %d: is it running a compatible version of Lustre?\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - case -EADDRINUSE: - LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to connect to %s at host %pI4h on port %d\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - default: - LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s at host %pI4h on port %d\n", - rc, libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - } -} -EXPORT_SYMBOL(lnet_connect_console_error); - -int -lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port) -{ - struct lnet_acceptor_connreq cr; - struct socket *sock; - int rc; - int port; - int fatal; - - BUILD_BUG_ON(sizeof(cr) > 16); /* too big to be on the stack */ - - for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT; - port >= LNET_ACCEPTOR_MIN_RESERVED_PORT; - --port) { - /* Iterate through reserved ports. */ - - rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip, - peer_port); - if (rc) { - if (fatal) - goto failed; - continue; - } - - BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1); - - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - cr.acr_nid = peer_nid; - - if (the_lnet.ln_testprotocompat) { - /* single-shot proto check */ - lnet_net_lock(LNET_LOCK_EX); - if (the_lnet.ln_testprotocompat & 4) { - cr.acr_version++; - the_lnet.ln_testprotocompat &= ~4; - } - if (the_lnet.ln_testprotocompat & 8) { - cr.acr_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~8; - } - lnet_net_unlock(LNET_LOCK_EX); - } - - rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout); - if (rc) - goto failed_sock; - - *sockp = sock; - return 0; - } - - rc = -EADDRINUSE; - goto failed; - - failed_sock: - sock_release(sock); - failed: - lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port); - return rc; -} -EXPORT_SYMBOL(lnet_connect); - -static int -lnet_accept(struct socket *sock, __u32 magic) -{ - struct lnet_acceptor_connreq cr; - __u32 peer_ip; - int peer_port; - int rc; - int flip; - struct lnet_ni *ni; - char *str; - - LASSERT(sizeof(cr) <= 16); /* not too big for the stack */ - - rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT(!rc); /* we succeeded before */ - - if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) { - if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) { - /* - * future version compatibility! - * When LNET unifies protocols over all LNDs, the first - * thing sent will be a version query. I send back - * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" - */ - memset(&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - rc = lnet_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - - if (rc) - CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n", - &peer_ip, rc); - return -EPROTO; - } - - if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC)) - str = "'old' socknal/tcpnal"; - else - str = "unrecognised"; - - LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h magic %08x: %s acceptor protocol\n", - &peer_ip, magic, str); - return -EPROTO; - } - - flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC); - - rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version), - accept_timeout); - if (rc) { - CERROR("Error %d reading connection request version from %pI4h\n", - rc, &peer_ip); - return -EIO; - } - - if (flip) - __swab32s(&cr.acr_version); - - if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) { - /* - * future version compatibility! - * An acceptor-specific protocol rev will first send a version - * query. I send back my current version to tell her I'm - * "old". - */ - int peer_version = cr.acr_version; - - memset(&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - - rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout); - if (rc) - CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n", - peer_version, &peer_ip, rc); - return -EPROTO; - } - - rc = lnet_sock_read(sock, &cr.acr_nid, - sizeof(cr) - - offsetof(struct lnet_acceptor_connreq, acr_nid), - accept_timeout); - if (rc) { - CERROR("Error %d reading connection request from %pI4h\n", - rc, &peer_ip); - return -EIO; - } - - if (flip) - __swab64s(&cr.acr_nid); - - ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid)); - if (!ni || /* no matching net */ - ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */ - if (ni) - lnet_ni_decref(ni); - LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n", - &peer_ip, libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - if (!ni->ni_lnd->lnd_accept) { - /* This catches a request for the loopback LND */ - lnet_ni_decref(ni); - LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n", - &peer_ip, libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - CDEBUG(D_NET, "Accept %s from %pI4h\n", - libcfs_nid2str(cr.acr_nid), &peer_ip); - - rc = ni->ni_lnd->lnd_accept(ni, sock); - - lnet_ni_decref(ni); - return rc; -} - -static int -lnet_acceptor(void *arg) -{ - struct socket *newsock; - int rc; - __u32 magic; - __u32 peer_ip; - int peer_port; - int secure = (int)((long)arg); - - LASSERT(!lnet_acceptor_state.pta_sock); - - rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port, - accept_backlog); - if (rc) { - if (rc == -EADDRINUSE) - LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n", - accept_port); - else - LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n", - accept_port, rc); - - lnet_acceptor_state.pta_sock = NULL; - } else { - LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port); - } - - /* set init status and unblock parent */ - lnet_acceptor_state.pta_shutdown = rc; - complete(&lnet_acceptor_state.pta_signal); - - if (rc) - return rc; - - while (!lnet_acceptor_state.pta_shutdown) { - rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock); - if (rc) { - if (rc != -EAGAIN) { - CWARN("Accept error %d: pausing...\n", rc); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - continue; - } - - /* maybe the LNet acceptor thread has been waken */ - if (lnet_acceptor_state.pta_shutdown) { - sock_release(newsock); - break; - } - - rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port); - if (rc) { - CERROR("Can't determine new connection's address\n"); - goto failed; - } - - if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - CERROR("Refusing connection from %pI4h: insecure port %d\n", - &peer_ip, peer_port); - goto failed; - } - - rc = lnet_sock_read(newsock, &magic, sizeof(magic), - accept_timeout); - if (rc) { - CERROR("Error %d reading connection request from %pI4h\n", - rc, &peer_ip); - goto failed; - } - - rc = lnet_accept(newsock, magic); - if (rc) - goto failed; - - continue; - -failed: - sock_release(newsock); - } - - sock_release(lnet_acceptor_state.pta_sock); - lnet_acceptor_state.pta_sock = NULL; - - CDEBUG(D_NET, "Acceptor stopping\n"); - - /* unblock lnet_acceptor_stop() */ - complete(&lnet_acceptor_state.pta_signal); - return 0; -} - -static inline int -accept2secure(const char *acc, long *sec) -{ - if (!strcmp(acc, "secure")) { - *sec = 1; - return 1; - } else if (!strcmp(acc, "all")) { - *sec = 0; - return 1; - } else if (!strcmp(acc, "none")) { - return 0; - } - - LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n", - acc); - return -EINVAL; -} - -int -lnet_acceptor_start(void) -{ - struct task_struct *task; - int rc; - long rc2; - long secure; - - /* if acceptor is already running return immediately */ - if (!lnet_acceptor_state.pta_shutdown) - return 0; - - LASSERT(!lnet_acceptor_state.pta_sock); - - rc = lnet_acceptor_get_tunables(); - if (rc) - return rc; - - init_completion(&lnet_acceptor_state.pta_signal); - rc = accept2secure(accept_type, &secure); - if (rc <= 0) - return rc; - - if (!lnet_count_acceptor_nis()) /* not required */ - return 0; - - task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure, - "acceptor_%03ld", secure); - if (IS_ERR(task)) { - rc2 = PTR_ERR(task); - CERROR("Can't start acceptor thread: %ld\n", rc2); - - return -ESRCH; - } - - /* wait for acceptor to startup */ - wait_for_completion(&lnet_acceptor_state.pta_signal); - - if (!lnet_acceptor_state.pta_shutdown) { - /* started OK */ - LASSERT(lnet_acceptor_state.pta_sock); - return 0; - } - - LASSERT(!lnet_acceptor_state.pta_sock); - - return -ENETDOWN; -} - -void -lnet_acceptor_stop(void) -{ - struct sock *sk; - - if (lnet_acceptor_state.pta_shutdown) /* not running */ - return; - - lnet_acceptor_state.pta_shutdown = 1; - - sk = lnet_acceptor_state.pta_sock->sk; - - /* awake any sleepers using safe method */ - sk->sk_state_change(sk); - - /* block until acceptor signals exit */ - wait_for_completion(&lnet_acceptor_state.pta_signal); -} diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c deleted file mode 100644 index 90266be0132d..000000000000 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ /dev/null @@ -1,2307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <linux/log2.h> -#include <linux/ktime.h> - -#include <linux/lnet/lib-lnet.h> -#include <uapi/linux/lnet/lnet-dlc.h> - -#define D_LNI D_CONSOLE - -struct lnet the_lnet; /* THE state of the network */ -EXPORT_SYMBOL(the_lnet); - -static char *ip2nets = ""; -module_param(ip2nets, charp, 0444); -MODULE_PARM_DESC(ip2nets, "LNET network <- IP table"); - -static char *networks = ""; -module_param(networks, charp, 0444); -MODULE_PARM_DESC(networks, "local networks"); - -static char *routes = ""; -module_param(routes, charp, 0444); -MODULE_PARM_DESC(routes, "routes to non-local networks"); - -static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; -module_param(rnet_htable_size, int, 0444); -MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table"); - -static int lnet_ping(struct lnet_process_id id, int timeout_ms, - struct lnet_process_id __user *ids, int n_ids); - -static char * -lnet_get_routes(void) -{ - return routes; -} - -static char * -lnet_get_networks(void) -{ - char *nets; - int rc; - - if (*networks && *ip2nets) { - LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n"); - return NULL; - } - - if (*ip2nets) { - rc = lnet_parse_ip2nets(&nets, ip2nets); - return !rc ? nets : NULL; - } - - if (*networks) - return networks; - - return "tcp"; -} - -static void -lnet_init_locks(void) -{ - spin_lock_init(&the_lnet.ln_eq_wait_lock); - init_waitqueue_head(&the_lnet.ln_eq_waitq); - init_waitqueue_head(&the_lnet.ln_rc_waitq); - mutex_init(&the_lnet.ln_lnd_mutex); - mutex_init(&the_lnet.ln_api_mutex); -} - -static int -lnet_create_remote_nets_table(void) -{ - int i; - struct list_head *hash; - - LASSERT(!the_lnet.ln_remote_nets_hash); - LASSERT(the_lnet.ln_remote_nets_hbits > 0); - hash = kvmalloc_array(LNET_REMOTE_NETS_HASH_SIZE, sizeof(*hash), - GFP_KERNEL); - if (!hash) { - CERROR("Failed to create remote nets hash table\n"); - return -ENOMEM; - } - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) - INIT_LIST_HEAD(&hash[i]); - the_lnet.ln_remote_nets_hash = hash; - return 0; -} - -static void -lnet_destroy_remote_nets_table(void) -{ - int i; - - if (!the_lnet.ln_remote_nets_hash) - return; - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) - LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i])); - - kvfree(the_lnet.ln_remote_nets_hash); - the_lnet.ln_remote_nets_hash = NULL; -} - -static void -lnet_destroy_locks(void) -{ - if (the_lnet.ln_res_lock) { - cfs_percpt_lock_free(the_lnet.ln_res_lock); - the_lnet.ln_res_lock = NULL; - } - - if (the_lnet.ln_net_lock) { - cfs_percpt_lock_free(the_lnet.ln_net_lock); - the_lnet.ln_net_lock = NULL; - } -} - -static int -lnet_create_locks(void) -{ - lnet_init_locks(); - - the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); - if (!the_lnet.ln_res_lock) - goto failed; - - the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); - if (!the_lnet.ln_net_lock) - goto failed; - - return 0; - - failed: - lnet_destroy_locks(); - return -ENOMEM; -} - -static void lnet_assert_wire_constants(void) -{ - /* - * Wire protocol assertions generated by 'wirecheck' - * running on Linux robert.bartonsoftware.com 2.6.8-1.521 - * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux - * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) - */ - - /* Constants... */ - BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded); - BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1); - BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0); - BUILD_BUG_ON(LNET_MSG_ACK != 0); - BUILD_BUG_ON(LNET_MSG_PUT != 1); - BUILD_BUG_ON(LNET_MSG_GET != 2); - BUILD_BUG_ON(LNET_MSG_REPLY != 3); - BUILD_BUG_ON(LNET_MSG_HELLO != 4); - - /* Checks for struct ptl_handle_wire_t */ - BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) != 0); - BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_object_cookie) != 8); - BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8); - - /* Checks for struct struct lnet_magicversion */ - BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0); - BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4); - BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2); - BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_minor) != 6); - BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2); - - /* Checks for struct struct lnet_hdr */ - BUILD_BUG_ON((int)sizeof(struct lnet_hdr) != 72); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_nid) != 0); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_nid) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_nid) != 8); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_nid) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_pid) != 16); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_pid) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_pid) != 20); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_pid) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, type) != 24); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->type) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, payload_length) != 28); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->payload_length) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg) != 40); - - /* Ack */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.match_bits) != 48); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.mlength) != 56); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) != 4); - - /* Put */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.match_bits) != 48); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.hdr_data) != 56); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ptl_index) != 64); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.offset) != 68); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) != 4); - - /* Get */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.return_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.match_bits) != 48); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.ptl_index) != 56); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.src_offset) != 60); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.sink_length) != 64); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) != 4); - - /* Reply */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) != 16); - - /* Hello */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.incarnation) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.type) != 40); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) != 4); -} - -static struct lnet_lnd * -lnet_find_lnd_by_type(__u32 type) -{ - struct lnet_lnd *lnd; - struct list_head *tmp; - - /* holding lnd mutex */ - list_for_each(tmp, &the_lnet.ln_lnds) { - lnd = list_entry(tmp, struct lnet_lnd, lnd_list); - - if (lnd->lnd_type == type) - return lnd; - } - - return NULL; -} - -void -lnet_register_lnd(struct lnet_lnd *lnd) -{ - mutex_lock(&the_lnet.ln_lnd_mutex); - - LASSERT(libcfs_isknown_lnd(lnd->lnd_type)); - LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type)); - - list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds); - lnd->lnd_refcount = 0; - - CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type)); - - mutex_unlock(&the_lnet.ln_lnd_mutex); -} -EXPORT_SYMBOL(lnet_register_lnd); - -void -lnet_unregister_lnd(struct lnet_lnd *lnd) -{ - mutex_lock(&the_lnet.ln_lnd_mutex); - - LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd); - LASSERT(!lnd->lnd_refcount); - - list_del(&lnd->lnd_list); - CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); - - mutex_unlock(&the_lnet.ln_lnd_mutex); -} -EXPORT_SYMBOL(lnet_unregister_lnd); - -void -lnet_counters_get(struct lnet_counters *counters) -{ - struct lnet_counters *ctr; - int i; - - memset(counters, 0, sizeof(*counters)); - - lnet_net_lock(LNET_LOCK_EX); - - cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) { - counters->msgs_max += ctr->msgs_max; - counters->msgs_alloc += ctr->msgs_alloc; - counters->errors += ctr->errors; - counters->send_count += ctr->send_count; - counters->recv_count += ctr->recv_count; - counters->route_count += ctr->route_count; - counters->drop_count += ctr->drop_count; - counters->send_length += ctr->send_length; - counters->recv_length += ctr->recv_length; - counters->route_length += ctr->route_length; - counters->drop_length += ctr->drop_length; - } - lnet_net_unlock(LNET_LOCK_EX); -} -EXPORT_SYMBOL(lnet_counters_get); - -void -lnet_counters_reset(void) -{ - struct lnet_counters *counters; - int i; - - lnet_net_lock(LNET_LOCK_EX); - - cfs_percpt_for_each(counters, i, the_lnet.ln_counters) - memset(counters, 0, sizeof(struct lnet_counters)); - - lnet_net_unlock(LNET_LOCK_EX); -} - -static char * -lnet_res_type2str(int type) -{ - switch (type) { - default: - LBUG(); - case LNET_COOKIE_TYPE_MD: - return "MD"; - case LNET_COOKIE_TYPE_ME: - return "ME"; - case LNET_COOKIE_TYPE_EQ: - return "EQ"; - } -} - -static void -lnet_res_container_cleanup(struct lnet_res_container *rec) -{ - int count = 0; - - if (!rec->rec_type) /* not set yet, it's uninitialized */ - return; - - while (!list_empty(&rec->rec_active)) { - struct list_head *e = rec->rec_active.next; - - list_del_init(e); - if (rec->rec_type == LNET_COOKIE_TYPE_EQ) { - kfree(list_entry(e, struct lnet_eq, eq_list)); - - } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) { - kfree(list_entry(e, struct lnet_libmd, md_list)); - - } else { /* NB: Active MEs should be attached on portals */ - LBUG(); - } - count++; - } - - if (count > 0) { - /* - * Found alive MD/ME/EQ, user really should unlink/free - * all of them before finalize LNet, but if someone didn't, - * we have to recycle garbage for him - */ - CERROR("%d active elements on exit of %s container\n", - count, lnet_res_type2str(rec->rec_type)); - } - - kfree(rec->rec_lh_hash); - rec->rec_lh_hash = NULL; - - rec->rec_type = 0; /* mark it as finalized */ -} - -static int -lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type) -{ - int rc = 0; - int i; - - LASSERT(!rec->rec_type); - - rec->rec_type = type; - INIT_LIST_HEAD(&rec->rec_active); - rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type; - - /* Arbitrary choice of hash table size */ - rec->rec_lh_hash = kvmalloc_cpt(LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]), - GFP_KERNEL, cpt); - if (!rec->rec_lh_hash) { - rc = -ENOMEM; - goto out; - } - - for (i = 0; i < LNET_LH_HASH_SIZE; i++) - INIT_LIST_HEAD(&rec->rec_lh_hash[i]); - - return 0; - -out: - CERROR("Failed to setup %s resource container\n", - lnet_res_type2str(type)); - lnet_res_container_cleanup(rec); - return rc; -} - -static void -lnet_res_containers_destroy(struct lnet_res_container **recs) -{ - struct lnet_res_container *rec; - int i; - - cfs_percpt_for_each(rec, i, recs) - lnet_res_container_cleanup(rec); - - cfs_percpt_free(recs); -} - -static struct lnet_res_container ** -lnet_res_containers_create(int type) -{ - struct lnet_res_container **recs; - struct lnet_res_container *rec; - int rc; - int i; - - recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec)); - if (!recs) { - CERROR("Failed to allocate %s resource containers\n", - lnet_res_type2str(type)); - return NULL; - } - - cfs_percpt_for_each(rec, i, recs) { - rc = lnet_res_container_setup(rec, i, type); - if (rc) { - lnet_res_containers_destroy(recs); - return NULL; - } - } - - return recs; -} - -struct lnet_libhandle * -lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie) -{ - /* ALWAYS called with lnet_res_lock held */ - struct list_head *head; - struct lnet_libhandle *lh; - unsigned int hash; - - if ((cookie & LNET_COOKIE_MASK) != rec->rec_type) - return NULL; - - hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS); - head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK]; - - list_for_each_entry(lh, head, lh_hash_chain) { - if (lh->lh_cookie == cookie) - return lh; - } - - return NULL; -} - -void -lnet_res_lh_initialize(struct lnet_res_container *rec, - struct lnet_libhandle *lh) -{ - /* ALWAYS called with lnet_res_lock held */ - unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS; - unsigned int hash; - - lh->lh_cookie = rec->rec_lh_cookie; - rec->rec_lh_cookie += 1 << ibits; - - hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK; - - list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]); -} - -static int lnet_unprepare(void); - -static int -lnet_prepare(lnet_pid_t requested_pid) -{ - /* Prepare to bring up the network */ - struct lnet_res_container **recs; - int rc = 0; - - if (requested_pid == LNET_PID_ANY) { - /* Don't instantiate LNET just for me */ - return -ENETDOWN; - } - - LASSERT(!the_lnet.ln_refcount); - - the_lnet.ln_routing = 0; - - LASSERT(!(requested_pid & LNET_PID_USERFLAG)); - the_lnet.ln_pid = requested_pid; - - INIT_LIST_HEAD(&the_lnet.ln_test_peers); - INIT_LIST_HEAD(&the_lnet.ln_nis); - INIT_LIST_HEAD(&the_lnet.ln_nis_cpt); - INIT_LIST_HEAD(&the_lnet.ln_nis_zombie); - INIT_LIST_HEAD(&the_lnet.ln_routers); - INIT_LIST_HEAD(&the_lnet.ln_drop_rules); - INIT_LIST_HEAD(&the_lnet.ln_delay_rules); - - rc = lnet_create_remote_nets_table(); - if (rc) - goto failed; - /* - * NB the interface cookie in wire handles guards against delayed - * replies and ACKs appearing valid after reboot. - */ - the_lnet.ln_interface_cookie = ktime_get_ns(); - - the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct lnet_counters)); - if (!the_lnet.ln_counters) { - CERROR("Failed to allocate counters for LNet\n"); - rc = -ENOMEM; - goto failed; - } - - rc = lnet_peer_tables_create(); - if (rc) - goto failed; - - rc = lnet_msg_containers_create(); - if (rc) - goto failed; - - rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0, - LNET_COOKIE_TYPE_EQ); - if (rc) - goto failed; - - recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME); - if (!recs) { - rc = -ENOMEM; - goto failed; - } - - the_lnet.ln_me_containers = recs; - - recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD); - if (!recs) { - rc = -ENOMEM; - goto failed; - } - - the_lnet.ln_md_containers = recs; - - rc = lnet_portals_create(); - if (rc) { - CERROR("Failed to create portals for LNet: %d\n", rc); - goto failed; - } - - return 0; - - failed: - lnet_unprepare(); - return rc; -} - -static int -lnet_unprepare(void) -{ - /* - * NB no LNET_LOCK since this is the last reference. All LND instances - * have shut down already, so it is safe to unlink and free all - * descriptors, even those that appear committed to a network op (eg MD - * with non-zero pending count) - */ - lnet_fail_nid(LNET_NID_ANY, 0); - - LASSERT(!the_lnet.ln_refcount); - LASSERT(list_empty(&the_lnet.ln_test_peers)); - LASSERT(list_empty(&the_lnet.ln_nis)); - LASSERT(list_empty(&the_lnet.ln_nis_cpt)); - LASSERT(list_empty(&the_lnet.ln_nis_zombie)); - - lnet_portals_destroy(); - - if (the_lnet.ln_md_containers) { - lnet_res_containers_destroy(the_lnet.ln_md_containers); - the_lnet.ln_md_containers = NULL; - } - - if (the_lnet.ln_me_containers) { - lnet_res_containers_destroy(the_lnet.ln_me_containers); - the_lnet.ln_me_containers = NULL; - } - - lnet_res_container_cleanup(&the_lnet.ln_eq_container); - - lnet_msg_containers_destroy(); - lnet_peer_tables_destroy(); - lnet_rtrpools_free(0); - - if (the_lnet.ln_counters) { - cfs_percpt_free(the_lnet.ln_counters); - the_lnet.ln_counters = NULL; - } - lnet_destroy_remote_nets_table(); - - return 0; -} - -struct lnet_ni * -lnet_net2ni_locked(__u32 net, int cpt) -{ - struct list_head *tmp; - struct lnet_ni *ni; - - LASSERT(cpt != LNET_LOCK_EX); - - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (LNET_NIDNET(ni->ni_nid) == net) { - lnet_ni_addref_locked(ni, cpt); - return ni; - } - } - - return NULL; -} - -struct lnet_ni * -lnet_net2ni(__u32 net) -{ - struct lnet_ni *ni; - - lnet_net_lock(0); - ni = lnet_net2ni_locked(net, 0); - lnet_net_unlock(0); - - return ni; -} -EXPORT_SYMBOL(lnet_net2ni); - -static unsigned int -lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number) -{ - __u64 key = nid; - unsigned int val; - - LASSERT(number >= 1 && number <= LNET_CPT_NUMBER); - - if (number == 1) - return 0; - - val = hash_long(key, LNET_CPT_BITS); - /* NB: LNET_CP_NUMBER doesn't have to be PO2 */ - if (val < number) - return val; - - return (unsigned int)(key + val + (val >> 1)) % number; -} - -int -lnet_cpt_of_nid_locked(lnet_nid_t nid) -{ - struct lnet_ni *ni; - - /* must called with hold of lnet_net_lock */ - if (LNET_CPT_NUMBER == 1) - return 0; /* the only one */ - - /* take lnet_net_lock(any) would be OK */ - if (!list_empty(&the_lnet.ln_nis_cpt)) { - list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) { - if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) - continue; - - LASSERT(ni->ni_cpts); - return ni->ni_cpts[lnet_nid_cpt_hash - (nid, ni->ni_ncpts)]; - } - } - - return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); -} - -int -lnet_cpt_of_nid(lnet_nid_t nid) -{ - int cpt; - int cpt2; - - if (LNET_CPT_NUMBER == 1) - return 0; /* the only one */ - - if (list_empty(&the_lnet.ln_nis_cpt)) - return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); - - cpt = lnet_net_lock_current(); - cpt2 = lnet_cpt_of_nid_locked(nid); - lnet_net_unlock(cpt); - - return cpt2; -} -EXPORT_SYMBOL(lnet_cpt_of_nid); - -int -lnet_islocalnet(__u32 net) -{ - struct lnet_ni *ni; - int cpt; - - cpt = lnet_net_lock_current(); - - ni = lnet_net2ni_locked(net, cpt); - if (ni) - lnet_ni_decref_locked(ni, cpt); - - lnet_net_unlock(cpt); - - return !!ni; -} - -struct lnet_ni * -lnet_nid2ni_locked(lnet_nid_t nid, int cpt) -{ - struct lnet_ni *ni; - struct list_head *tmp; - - LASSERT(cpt != LNET_LOCK_EX); - - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (ni->ni_nid == nid) { - lnet_ni_addref_locked(ni, cpt); - return ni; - } - } - - return NULL; -} - -int -lnet_islocalnid(lnet_nid_t nid) -{ - struct lnet_ni *ni; - int cpt; - - cpt = lnet_net_lock_current(); - ni = lnet_nid2ni_locked(nid, cpt); - if (ni) - lnet_ni_decref_locked(ni, cpt); - lnet_net_unlock(cpt); - - return !!ni; -} - -int -lnet_count_acceptor_nis(void) -{ - /* Return the # of NIs that need the acceptor. */ - int count = 0; - struct list_head *tmp; - struct lnet_ni *ni; - int cpt; - - cpt = lnet_net_lock_current(); - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (ni->ni_lnd->lnd_accept) - count++; - } - - lnet_net_unlock(cpt); - - return count; -} - -static struct lnet_ping_info * -lnet_ping_info_create(int num_ni) -{ - struct lnet_ping_info *ping_info; - unsigned int infosz; - - infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]); - ping_info = kvzalloc(infosz, GFP_KERNEL); - if (!ping_info) { - CERROR("Can't allocate ping info[%d]\n", num_ni); - return NULL; - } - - ping_info->pi_nnis = num_ni; - ping_info->pi_pid = the_lnet.ln_pid; - ping_info->pi_magic = LNET_PROTO_PING_MAGIC; - ping_info->pi_features = LNET_PING_FEAT_NI_STATUS; - - return ping_info; -} - -static inline int -lnet_get_ni_count(void) -{ - struct lnet_ni *ni; - int count = 0; - - lnet_net_lock(0); - - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) - count++; - - lnet_net_unlock(0); - - return count; -} - -static inline void -lnet_ping_info_free(struct lnet_ping_info *pinfo) -{ - kvfree(pinfo); -} - -static void -lnet_ping_info_destroy(void) -{ - struct lnet_ni *ni; - - lnet_net_lock(LNET_LOCK_EX); - - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - lnet_ni_lock(ni); - ni->ni_status = NULL; - lnet_ni_unlock(ni); - } - - lnet_ping_info_free(the_lnet.ln_ping_info); - the_lnet.ln_ping_info = NULL; - - lnet_net_unlock(LNET_LOCK_EX); -} - -static void -lnet_ping_event_handler(struct lnet_event *event) -{ - struct lnet_ping_info *pinfo = event->md.user_ptr; - - if (event->unlinked) - pinfo->pi_features = LNET_PING_FEAT_INVAL; -} - -static int -lnet_ping_info_setup(struct lnet_ping_info **ppinfo, - struct lnet_handle_md *md_handle, - int ni_count, bool set_eq) -{ - struct lnet_process_id id = {LNET_NID_ANY, LNET_PID_ANY}; - struct lnet_handle_me me_handle; - struct lnet_md md = { NULL }; - int rc, rc2; - - if (set_eq) { - rc = LNetEQAlloc(0, lnet_ping_event_handler, - &the_lnet.ln_ping_target_eq); - if (rc) { - CERROR("Can't allocate ping EQ: %d\n", rc); - return rc; - } - } - - *ppinfo = lnet_ping_info_create(ni_count); - if (!*ppinfo) { - rc = -ENOMEM; - goto failed_0; - } - - rc = LNetMEAttach(LNET_RESERVED_PORTAL, id, - LNET_PROTO_PING_MATCHBITS, 0, - LNET_UNLINK, LNET_INS_AFTER, - &me_handle); - if (rc) { - CERROR("Can't create ping ME: %d\n", rc); - goto failed_1; - } - - /* initialize md content */ - md.start = *ppinfo; - md.length = offsetof(struct lnet_ping_info, - pi_ni[(*ppinfo)->pi_nnis]); - md.threshold = LNET_MD_THRESH_INF; - md.max_size = 0; - md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE | - LNET_MD_MANAGE_REMOTE; - md.user_ptr = NULL; - md.eq_handle = the_lnet.ln_ping_target_eq; - md.user_ptr = *ppinfo; - - rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle); - if (rc) { - CERROR("Can't attach ping MD: %d\n", rc); - goto failed_2; - } - - return 0; - -failed_2: - rc2 = LNetMEUnlink(me_handle); - LASSERT(!rc2); -failed_1: - lnet_ping_info_free(*ppinfo); - *ppinfo = NULL; -failed_0: - if (set_eq) - LNetEQFree(the_lnet.ln_ping_target_eq); - return rc; -} - -static void -lnet_ping_md_unlink(struct lnet_ping_info *pinfo, - struct lnet_handle_md *md_handle) -{ - LNetMDUnlink(*md_handle); - LNetInvalidateMDHandle(md_handle); - - /* NB md could be busy; this just starts the unlink */ - while (pinfo->pi_features != LNET_PING_FEAT_INVAL) { - CDEBUG(D_NET, "Still waiting for ping MD to unlink\n"); - set_current_state(TASK_NOLOAD); - schedule_timeout(HZ); - } -} - -static void -lnet_ping_info_install_locked(struct lnet_ping_info *ping_info) -{ - struct lnet_ni_status *ns; - struct lnet_ni *ni; - int i = 0; - - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - LASSERT(i < ping_info->pi_nnis); - - ns = &ping_info->pi_ni[i]; - - ns->ns_nid = ni->ni_nid; - - lnet_ni_lock(ni); - ns->ns_status = (ni->ni_status) ? - ni->ni_status->ns_status : LNET_NI_STATUS_UP; - ni->ni_status = ns; - lnet_ni_unlock(ni); - - i++; - } -} - -static void -lnet_ping_target_update(struct lnet_ping_info *pinfo, - struct lnet_handle_md md_handle) -{ - struct lnet_ping_info *old_pinfo = NULL; - struct lnet_handle_md old_md; - - /* switch the NIs to point to the new ping info created */ - lnet_net_lock(LNET_LOCK_EX); - - if (!the_lnet.ln_routing) - pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED; - lnet_ping_info_install_locked(pinfo); - - if (the_lnet.ln_ping_info) { - old_pinfo = the_lnet.ln_ping_info; - old_md = the_lnet.ln_ping_target_md; - } - the_lnet.ln_ping_target_md = md_handle; - the_lnet.ln_ping_info = pinfo; - - lnet_net_unlock(LNET_LOCK_EX); - - if (old_pinfo) { - /* unlink the old ping info */ - lnet_ping_md_unlink(old_pinfo, &old_md); - lnet_ping_info_free(old_pinfo); - } -} - -static void -lnet_ping_target_fini(void) -{ - int rc; - - lnet_ping_md_unlink(the_lnet.ln_ping_info, - &the_lnet.ln_ping_target_md); - - rc = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT(!rc); - - lnet_ping_info_destroy(); -} - -static int -lnet_ni_tq_credits(struct lnet_ni *ni) -{ - int credits; - - LASSERT(ni->ni_ncpts >= 1); - - if (ni->ni_ncpts == 1) - return ni->ni_maxtxcredits; - - credits = ni->ni_maxtxcredits / ni->ni_ncpts; - credits = max(credits, 8 * ni->ni_peertxcredits); - credits = min(credits, ni->ni_maxtxcredits); - - return credits; -} - -static void -lnet_ni_unlink_locked(struct lnet_ni *ni) -{ - if (!list_empty(&ni->ni_cptlist)) { - list_del_init(&ni->ni_cptlist); - lnet_ni_decref_locked(ni, 0); - } - - /* move it to zombie list and nobody can find it anymore */ - LASSERT(!list_empty(&ni->ni_list)); - list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); - lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ -} - -static void -lnet_clear_zombies_nis_locked(void) -{ - int i; - int islo; - struct lnet_ni *ni; - struct lnet_ni *temp; - - /* - * Now wait for the NI's I just nuked to show up on ln_zombie_nis - * and shut them down in guaranteed thread context - */ - i = 2; - list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) { - int *ref; - int j; - - list_del_init(&ni->ni_list); - cfs_percpt_for_each(ref, j, ni->ni_refs) { - if (!*ref) - continue; - /* still busy, add it back to zombie list */ - list_add(&ni->ni_list, &the_lnet.ln_nis_zombie); - break; - } - - if (!list_empty(&ni->ni_list)) { - lnet_net_unlock(LNET_LOCK_EX); - ++i; - if ((i & (-i)) == i) { - CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n", - libcfs_nid2str(ni->ni_nid)); - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - lnet_net_lock(LNET_LOCK_EX); - continue; - } - - ni->ni_lnd->lnd_refcount--; - lnet_net_unlock(LNET_LOCK_EX); - - islo = ni->ni_lnd->lnd_type == LOLND; - - LASSERT(!in_interrupt()); - ni->ni_lnd->lnd_shutdown(ni); - - /* - * can't deref lnd anymore now; it might have unregistered - * itself... - */ - if (!islo) - CDEBUG(D_LNI, "Removed LNI %s\n", - libcfs_nid2str(ni->ni_nid)); - - lnet_ni_free(ni); - i = 2; - - lnet_net_lock(LNET_LOCK_EX); - } -} - -static void -lnet_shutdown_lndnis(void) -{ - struct lnet_ni *ni; - struct lnet_ni *temp; - int i; - - /* NB called holding the global mutex */ - - /* All quiet on the API front */ - LASSERT(!the_lnet.ln_shutdown); - LASSERT(!the_lnet.ln_refcount); - LASSERT(list_empty(&the_lnet.ln_nis_zombie)); - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_shutdown = 1; /* flag shutdown */ - - /* Unlink NIs from the global table */ - list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) { - lnet_ni_unlink_locked(ni); - } - - /* Drop the cached loopback NI. */ - if (the_lnet.ln_loni) { - lnet_ni_decref_locked(the_lnet.ln_loni, 0); - the_lnet.ln_loni = NULL; - } - - lnet_net_unlock(LNET_LOCK_EX); - - /* - * Clear lazy portals and drop delayed messages which hold refs - * on their lnet_msg::msg_rxpeer - */ - for (i = 0; i < the_lnet.ln_nportals; i++) - LNetClearLazyPortal(i); - - /* - * Clear the peer table and wait for all peers to go (they hold refs on - * their NIs) - */ - lnet_peer_tables_cleanup(NULL); - - lnet_net_lock(LNET_LOCK_EX); - - lnet_clear_zombies_nis_locked(); - the_lnet.ln_shutdown = 0; - lnet_net_unlock(LNET_LOCK_EX); -} - -/* shutdown down the NI and release refcount */ -static void -lnet_shutdown_lndni(struct lnet_ni *ni) -{ - int i; - - lnet_net_lock(LNET_LOCK_EX); - lnet_ni_unlink_locked(ni); - lnet_net_unlock(LNET_LOCK_EX); - - /* clear messages for this NI on the lazy portal */ - for (i = 0; i < the_lnet.ln_nportals; i++) - lnet_clear_lazy_portal(ni, i, "Shutting down NI"); - - /* Do peer table cleanup for this ni */ - lnet_peer_tables_cleanup(ni); - - lnet_net_lock(LNET_LOCK_EX); - lnet_clear_zombies_nis_locked(); - lnet_net_unlock(LNET_LOCK_EX); -} - -static int -lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) -{ - struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL; - int rc = -EINVAL; - int lnd_type; - struct lnet_lnd *lnd; - struct lnet_tx_queue *tq; - int i; - u32 seed; - - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - - LASSERT(libcfs_isknown_lnd(lnd_type)); - - if (lnd_type == CIBLND || lnd_type == OPENIBLND || - lnd_type == IIBLND || lnd_type == VIBLND) { - CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type)); - goto failed0; - } - - /* Make sure this new NI is unique. */ - lnet_net_lock(LNET_LOCK_EX); - rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis); - lnet_net_unlock(LNET_LOCK_EX); - if (!rc) { - if (lnd_type == LOLND) { - lnet_ni_free(ni); - return 0; - } - - CERROR("Net %s is not unique\n", - libcfs_net2str(LNET_NIDNET(ni->ni_nid))); - rc = -EEXIST; - goto failed0; - } - - mutex_lock(&the_lnet.ln_lnd_mutex); - lnd = lnet_find_lnd_by_type(lnd_type); - - if (!lnd) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - rc = request_module("%s", libcfs_lnd2modname(lnd_type)); - mutex_lock(&the_lnet.ln_lnd_mutex); - - lnd = lnet_find_lnd_by_type(lnd_type); - if (!lnd) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - CERROR("Can't load LND %s, module %s, rc=%d\n", - libcfs_lnd2str(lnd_type), - libcfs_lnd2modname(lnd_type), rc); - rc = -EINVAL; - goto failed0; - } - } - - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount++; - lnet_net_unlock(LNET_LOCK_EX); - - ni->ni_lnd = lnd; - - if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf)) - lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk; - - if (lnd_tunables) { - ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables), - GFP_NOFS); - if (!ni->ni_lnd_tunables) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - rc = -ENOMEM; - goto failed0; - } - memcpy(ni->ni_lnd_tunables, lnd_tunables, - sizeof(*ni->ni_lnd_tunables)); - } - - /* - * If given some LND tunable parameters, parse those now to - * override the values in the NI structure. - */ - if (conf) { - if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) - ni->ni_peerrtrcredits = - conf->cfg_config_u.cfg_net.net_peer_rtr_credits; - if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) - ni->ni_peertimeout = - conf->cfg_config_u.cfg_net.net_peer_timeout; - if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1) - ni->ni_peertxcredits = - conf->cfg_config_u.cfg_net.net_peer_tx_credits; - if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) - ni->ni_maxtxcredits = - conf->cfg_config_u.cfg_net.net_max_tx_credits; - } - - rc = lnd->lnd_startup(ni); - - mutex_unlock(&the_lnet.ln_lnd_mutex); - - if (rc) { - LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n", - rc, libcfs_lnd2str(lnd->lnd_type)); - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount--; - lnet_net_unlock(LNET_LOCK_EX); - goto failed0; - } - - LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query); - - lnet_net_lock(LNET_LOCK_EX); - /* refcount for ln_nis */ - lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_list, &the_lnet.ln_nis); - if (ni->ni_cpts) { - lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt); - } - - lnet_net_unlock(LNET_LOCK_EX); - - if (lnd->lnd_type == LOLND) { - lnet_ni_addref(ni); - LASSERT(!the_lnet.ln_loni); - the_lnet.ln_loni = ni; - return 0; - } - - if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) { - LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", - libcfs_lnd2str(lnd->lnd_type), - !ni->ni_peertxcredits ? - "" : "per-peer "); - /* - * shutdown the NI since if we get here then it must've already - * been started - */ - lnet_shutdown_lndni(ni); - return -EINVAL; - } - - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { - tq->tq_credits_min = - tq->tq_credits_max = - tq->tq_credits = lnet_ni_tq_credits(ni); - } - - /* Nodes with small feet have little entropy. The NID for this - * node gives the most entropy in the low bits. - */ - seed = LNET_NIDADDR(ni->ni_nid); - add_device_randomness(&seed, sizeof(seed)); - - CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", - libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, - lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, - ni->ni_peerrtrcredits, ni->ni_peertimeout); - - return 0; -failed0: - lnet_ni_free(ni); - return rc; -} - -static int -lnet_startup_lndnis(struct list_head *nilist) -{ - struct lnet_ni *ni; - int rc; - int ni_count = 0; - - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, struct lnet_ni, ni_list); - list_del(&ni->ni_list); - rc = lnet_startup_lndni(ni, NULL); - - if (rc < 0) - goto failed; - - ni_count++; - } - - return ni_count; -failed: - lnet_shutdown_lndnis(); - - return rc; -} - -/** - * Initialize LNet library. - * - * Automatically called at module loading time. Caller has to call - * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the - * latter returned 0. It must be called exactly once. - * - * \retval 0 on success - * \retval -ve on failures. - */ -int lnet_lib_init(void) -{ - int rc; - - lnet_assert_wire_constants(); - - memset(&the_lnet, 0, sizeof(the_lnet)); - - /* refer to global cfs_cpt_table for now */ - the_lnet.ln_cpt_table = cfs_cpt_table; - the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_table); - - LASSERT(the_lnet.ln_cpt_number > 0); - if (the_lnet.ln_cpt_number > LNET_CPT_MAX) { - /* we are under risk of consuming all lh_cookie */ - CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n", - the_lnet.ln_cpt_number, LNET_CPT_MAX); - return -E2BIG; - } - - while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number) - the_lnet.ln_cpt_bits++; - - rc = lnet_create_locks(); - if (rc) { - CERROR("Can't create LNet global locks: %d\n", rc); - return rc; - } - - the_lnet.ln_refcount = 0; - LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh); - INIT_LIST_HEAD(&the_lnet.ln_lnds); - INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie); - INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow); - - /* - * The hash table size is the number of bits it takes to express the set - * ln_num_routes, minus 1 (better to under estimate than over so we - * don't waste memory). - */ - if (rnet_htable_size <= 0) - rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; - else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX) - rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX; - the_lnet.ln_remote_nets_hbits = max_t(int, 1, - order_base_2(rnet_htable_size) - 1); - - /* - * All LNDs apart from the LOLND are in separate modules. They - * register themselves when their module loads, and unregister - * themselves when their module is unloaded. - */ - lnet_register_lnd(&the_lolnd); - return 0; -} - -/** - * Finalize LNet library. - * - * \pre lnet_lib_init() called with success. - * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls. - */ -void lnet_lib_exit(void) -{ - LASSERT(!the_lnet.ln_refcount); - - while (!list_empty(&the_lnet.ln_lnds)) - lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next, - struct lnet_lnd, lnd_list)); - lnet_destroy_locks(); -} - -/** - * Set LNet PID and start LNet interfaces, routing, and forwarding. - * - * Users must call this function at least once before any other functions. - * For each successful call there must be a corresponding call to - * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is - * ignored. - * - * The PID used by LNet may be different from the one requested. - * See LNetGetId(). - * - * \param requested_pid PID requested by the caller. - * - * \return >= 0 on success, and < 0 error code on failures. - */ -int -LNetNIInit(lnet_pid_t requested_pid) -{ - int im_a_router = 0; - int rc; - int ni_count; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; - struct list_head net_head; - - INIT_LIST_HEAD(&net_head); - - mutex_lock(&the_lnet.ln_api_mutex); - - CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount); - - if (the_lnet.ln_refcount > 0) { - rc = the_lnet.ln_refcount++; - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - } - - rc = lnet_prepare(requested_pid); - if (rc) { - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - } - - /* Add in the loopback network */ - if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) { - rc = -ENOMEM; - goto err_empty_list; - } - - /* - * If LNet is being initialized via DLC it is possible - * that the user requests not to load module parameters (ones which - * are supported by DLC) on initialization. Therefore, make sure not - * to load networks, routes and forwarding from module parameters - * in this case. On cleanup in case of failure only clean up - * routes if it has been loaded - */ - if (!the_lnet.ln_nis_from_mod_params) { - rc = lnet_parse_networks(&net_head, lnet_get_networks()); - if (rc < 0) - goto err_empty_list; - } - - ni_count = lnet_startup_lndnis(&net_head); - if (ni_count < 0) { - rc = ni_count; - goto err_empty_list; - } - - if (!the_lnet.ln_nis_from_mod_params) { - rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); - if (rc) - goto err_shutdown_lndnis; - - rc = lnet_check_routes(); - if (rc) - goto err_destroy_routes; - - rc = lnet_rtrpools_alloc(im_a_router); - if (rc) - goto err_destroy_routes; - } - - rc = lnet_acceptor_start(); - if (rc) - goto err_destroy_routes; - - the_lnet.ln_refcount = 1; - /* Now I may use my own API functions... */ - - rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true); - if (rc) - goto err_acceptor_stop; - - lnet_ping_target_update(pinfo, md_handle); - - rc = lnet_router_checker_start(); - if (rc) - goto err_stop_ping; - - lnet_fault_init(); - lnet_router_debugfs_init(); - - mutex_unlock(&the_lnet.ln_api_mutex); - - return 0; - -err_stop_ping: - lnet_ping_target_fini(); -err_acceptor_stop: - the_lnet.ln_refcount = 0; - lnet_acceptor_stop(); -err_destroy_routes: - if (!the_lnet.ln_nis_from_mod_params) - lnet_destroy_routes(); -err_shutdown_lndnis: - lnet_shutdown_lndnis(); -err_empty_list: - lnet_unprepare(); - LASSERT(rc < 0); - mutex_unlock(&the_lnet.ln_api_mutex); - while (!list_empty(&net_head)) { - struct lnet_ni *ni; - - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - list_del_init(&ni->ni_list); - lnet_ni_free(ni); - } - return rc; -} -EXPORT_SYMBOL(LNetNIInit); - -/** - * Stop LNet interfaces, routing, and forwarding. - * - * Users must call this function once for each successful call to LNetNIInit(). - * Once the LNetNIFini() operation has been started, the results of pending - * API operations are undefined. - * - * \return always 0 for current implementation. - */ -int -LNetNIFini(void) -{ - mutex_lock(&the_lnet.ln_api_mutex); - - LASSERT(the_lnet.ln_refcount > 0); - - if (the_lnet.ln_refcount != 1) { - the_lnet.ln_refcount--; - } else { - LASSERT(!the_lnet.ln_niinit_self); - - lnet_fault_fini(); - lnet_router_debugfs_fini(); - lnet_router_checker_stop(); - lnet_ping_target_fini(); - - /* Teardown fns that use my own API functions BEFORE here */ - the_lnet.ln_refcount = 0; - - lnet_acceptor_stop(); - lnet_destroy_routes(); - lnet_shutdown_lndnis(); - lnet_unprepare(); - } - - mutex_unlock(&the_lnet.ln_api_mutex); - return 0; -} -EXPORT_SYMBOL(LNetNIFini); - -/** - * Grabs the ni data from the ni structure and fills the out - * parameters - * - * \param[in] ni network interface structure - * \param[out] config NI configuration - */ -static void -lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config) -{ - struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL; - struct lnet_ioctl_net_config *net_config; - size_t min_size, tunable_size = 0; - int i; - - if (!ni || !config) - return; - - net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk; - if (!net_config) - return; - - BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) != - ARRAY_SIZE(net_config->ni_interfaces)); - - for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) { - if (!ni->ni_interfaces[i]) - break; - - strncpy(net_config->ni_interfaces[i], - ni->ni_interfaces[i], - sizeof(net_config->ni_interfaces[i])); - } - - config->cfg_nid = ni->ni_nid; - config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout; - config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits; - config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits; - config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits; - - net_config->ni_status = ni->ni_status->ns_status; - - if (ni->ni_cpts) { - int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT); - - for (i = 0; i < num_cpts; i++) - net_config->ni_cpts[i] = ni->ni_cpts[i]; - - config->cfg_ncpts = num_cpts; - } - - /* - * See if user land tools sent in a newer and larger version - * of struct lnet_tunables than what the kernel uses. - */ - min_size = sizeof(*config) + sizeof(*net_config); - - if (config->cfg_hdr.ioc_len > min_size) - tunable_size = config->cfg_hdr.ioc_len - min_size; - - /* Don't copy to much data to user space */ - min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables)); - lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk; - - if (ni->ni_lnd_tunables && lnd_cfg && min_size) { - memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size); - config->cfg_config_u.cfg_net.net_interface_count = 1; - - /* Tell user land that kernel side has less data */ - if (tunable_size > sizeof(*ni->ni_lnd_tunables)) { - min_size = tunable_size - sizeof(ni->ni_lnd_tunables); - config->cfg_hdr.ioc_len -= min_size; - } - } -} - -static int -lnet_get_net_config(struct lnet_ioctl_config_data *config) -{ - struct lnet_ni *ni; - struct list_head *tmp; - int idx = config->cfg_count; - int cpt, i = 0; - int rc = -ENOENT; - - cpt = lnet_net_lock_current(); - - list_for_each(tmp, &the_lnet.ln_nis) { - if (i++ != idx) - continue; - - ni = list_entry(tmp, struct lnet_ni, ni_list); - lnet_ni_lock(ni); - lnet_fill_ni_info(ni, config); - lnet_ni_unlock(ni); - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} - -int -lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf) -{ - char *nets = conf->cfg_config_u.cfg_net.net_intf; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; - struct lnet_ni *ni; - struct list_head net_head; - struct lnet_remotenet *rnet; - int rc; - - INIT_LIST_HEAD(&net_head); - - /* Create a ni structure for the network string */ - rc = lnet_parse_networks(&net_head, nets); - if (rc <= 0) - return !rc ? -EINVAL : rc; - - mutex_lock(&the_lnet.ln_api_mutex); - - if (rc > 1) { - rc = -EINVAL; /* only add one interface per call */ - goto failed0; - } - - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - - lnet_net_lock(LNET_LOCK_EX); - rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid)); - lnet_net_unlock(LNET_LOCK_EX); - /* - * make sure that the net added doesn't invalidate the current - * configuration LNet is keeping - */ - if (rnet) { - CERROR("Adding net %s will invalidate routing configuration\n", - nets); - rc = -EUSERS; - goto failed0; - } - - rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(), - false); - if (rc) - goto failed0; - - list_del_init(&ni->ni_list); - - rc = lnet_startup_lndni(ni, conf); - if (rc) - goto failed1; - - if (ni->ni_lnd->lnd_accept) { - rc = lnet_acceptor_start(); - if (rc < 0) { - /* shutdown the ni that we just started */ - CERROR("Failed to start up acceptor thread\n"); - lnet_shutdown_lndni(ni); - goto failed1; - } - } - - lnet_ping_target_update(pinfo, md_handle); - mutex_unlock(&the_lnet.ln_api_mutex); - - return 0; - -failed1: - lnet_ping_md_unlink(pinfo, &md_handle); - lnet_ping_info_free(pinfo); -failed0: - mutex_unlock(&the_lnet.ln_api_mutex); - while (!list_empty(&net_head)) { - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - list_del_init(&ni->ni_list); - lnet_ni_free(ni); - } - return rc; -} - -int -lnet_dyn_del_ni(__u32 net) -{ - struct lnet_ni *ni; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; - int rc; - - /* don't allow userspace to shutdown the LOLND */ - if (LNET_NETTYP(net) == LOLND) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - /* create and link a new ping info, before removing the old one */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, - lnet_get_ni_count() - 1, false); - if (rc) - goto out; - - ni = lnet_net2ni(net); - if (!ni) { - rc = -EINVAL; - goto failed; - } - - /* decrement the reference counter taken by lnet_net2ni() */ - lnet_ni_decref_locked(ni, 0); - - lnet_shutdown_lndni(ni); - - if (!lnet_count_acceptor_nis()) - lnet_acceptor_stop(); - - lnet_ping_target_update(pinfo, md_handle); - goto out; -failed: - lnet_ping_md_unlink(pinfo, &md_handle); - lnet_ping_info_free(pinfo); -out: - mutex_unlock(&the_lnet.ln_api_mutex); - - return rc; -} - -/** - * LNet ioctl handler. - * - */ -int -LNetCtl(unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - struct lnet_ioctl_config_data *config; - struct lnet_process_id id = {0}; - struct lnet_ni *ni; - int rc; - unsigned long secs_passed; - - BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX < - sizeof(struct lnet_ioctl_net_config) + - sizeof(struct lnet_ioctl_config_data)); - - switch (cmd) { - case IOC_LIBCFS_GET_NI: - rc = LNetGetId(data->ioc_count, &id); - data->ioc_nid = id.nid; - return rc; - - case IOC_LIBCFS_FAIL_NID: - return lnet_fail_nid(data->ioc_nid, data->ioc_count); - - case IOC_LIBCFS_ADD_ROUTE: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - rc = lnet_add_route(config->cfg_net, - config->cfg_config_u.cfg_route.rtr_hop, - config->cfg_nid, - config->cfg_config_u.cfg_route.rtr_priority); - if (!rc) { - rc = lnet_check_routes(); - if (rc) - lnet_del_route(config->cfg_net, - config->cfg_nid); - } - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - - case IOC_LIBCFS_DEL_ROUTE: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - rc = lnet_del_route(config->cfg_net, config->cfg_nid); - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - - case IOC_LIBCFS_GET_ROUTE: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - return lnet_get_route(config->cfg_count, - &config->cfg_net, - &config->cfg_config_u.cfg_route.rtr_hop, - &config->cfg_nid, - &config->cfg_config_u.cfg_route.rtr_flags, - &config->cfg_config_u.cfg_route.rtr_priority); - - case IOC_LIBCFS_GET_NET: { - size_t total = sizeof(*config) + - sizeof(struct lnet_ioctl_net_config); - config = arg; - - if (config->cfg_hdr.ioc_len < total) - return -EINVAL; - - return lnet_get_net_config(config); - } - - case IOC_LIBCFS_GET_LNET_STATS: { - struct lnet_ioctl_lnet_stats *lnet_stats = arg; - - if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats)) - return -EINVAL; - - lnet_counters_get(&lnet_stats->st_cntrs); - return 0; - } - - case IOC_LIBCFS_CONFIG_RTR: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - if (config->cfg_config_u.cfg_buffers.buf_enable) { - rc = lnet_rtrpools_enable(); - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - } - lnet_rtrpools_disable(); - mutex_unlock(&the_lnet.ln_api_mutex); - return 0; - - case IOC_LIBCFS_ADD_BUF: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny, - config->cfg_config_u.cfg_buffers.buf_small, - config->cfg_config_u.cfg_buffers.buf_large); - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - - case IOC_LIBCFS_GET_BUF: { - struct lnet_ioctl_pool_cfg *pool_cfg; - size_t total = sizeof(*config) + sizeof(*pool_cfg); - - config = arg; - - if (config->cfg_hdr.ioc_len < total) - return -EINVAL; - - pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk; - return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg); - } - - case IOC_LIBCFS_GET_PEER_INFO: { - struct lnet_ioctl_peer *peer_info = arg; - - if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info)) - return -EINVAL; - - return lnet_get_peer_info(peer_info->pr_count, - &peer_info->pr_nid, - peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness, - &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt, - &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount, - &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob); - } - - case IOC_LIBCFS_NOTIFY_ROUTER: - secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]); - secs_passed *= msecs_to_jiffies(MSEC_PER_SEC); - - return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, - jiffies - secs_passed); - - case IOC_LIBCFS_LNET_DIST: - rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]); - if (rc < 0 && rc != -EHOSTUNREACH) - return rc; - - data->ioc_u32[0] = rc; - return 0; - - case IOC_LIBCFS_TESTPROTOCOMPAT: - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_testprotocompat = data->ioc_flags; - lnet_net_unlock(LNET_LOCK_EX); - return 0; - - case IOC_LIBCFS_LNET_FAULT: - return lnet_fault_ctl(data->ioc_flags, data); - - case IOC_LIBCFS_PING: - id.nid = data->ioc_nid; - id.pid = data->ioc_u32[0]; - rc = lnet_ping(id, data->ioc_u32[1], /* timeout */ - data->ioc_pbuf1, - data->ioc_plen1 / sizeof(struct lnet_process_id)); - if (rc < 0) - return rc; - data->ioc_count = rc; - return 0; - - default: - ni = lnet_net2ni(data->ioc_net); - if (!ni) - return -EINVAL; - - if (!ni->ni_lnd->lnd_ctl) - rc = -EINVAL; - else - rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg); - - lnet_ni_decref(ni); - return rc; - } - /* not reached */ -} -EXPORT_SYMBOL(LNetCtl); - -void LNetDebugPeer(struct lnet_process_id id) -{ - lnet_debug_peer(id.nid); -} -EXPORT_SYMBOL(LNetDebugPeer); - -/** - * Retrieve the lnet_process_id ID of LNet interface at \a index. Note that - * all interfaces share a same PID, as requested by LNetNIInit(). - * - * \param index Index of the interface to look up. - * \param id On successful return, this location will hold the - * lnet_process_id ID of the interface. - * - * \retval 0 If an interface exists at \a index. - * \retval -ENOENT If no interface has been found. - */ -int -LNetGetId(unsigned int index, struct lnet_process_id *id) -{ - struct lnet_ni *ni; - struct list_head *tmp; - int cpt; - int rc = -ENOENT; - - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_net_lock_current(); - - list_for_each(tmp, &the_lnet.ln_nis) { - if (index--) - continue; - - ni = list_entry(tmp, struct lnet_ni, ni_list); - - id->nid = ni->ni_nid; - id->pid = the_lnet.ln_pid; - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} -EXPORT_SYMBOL(LNetGetId); - -static int lnet_ping(struct lnet_process_id id, int timeout_ms, - struct lnet_process_id __user *ids, int n_ids) -{ - struct lnet_handle_eq eqh; - struct lnet_handle_md mdh; - struct lnet_event event; - struct lnet_md md = { NULL }; - int which; - int unlinked = 0; - int replied = 0; - const int a_long_time = 60000; /* mS */ - int infosz; - struct lnet_ping_info *info; - struct lnet_process_id tmpid; - int i; - int nob; - int rc; - int rc2; - - infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]); - - if (n_ids <= 0 || - id.nid == LNET_NID_ANY || - timeout_ms > 500000 || /* arbitrary limit! */ - n_ids > 20) /* arbitrary limit! */ - return -EINVAL; - - if (id.pid == LNET_PID_ANY) - id.pid = LNET_PID_LUSTRE; - - info = kzalloc(infosz, GFP_KERNEL); - if (!info) - return -ENOMEM; - - /* NB 2 events max (including any unlink event) */ - rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh); - if (rc) { - CERROR("Can't allocate EQ: %d\n", rc); - goto out_0; - } - - /* initialize md content */ - md.start = info; - md.length = infosz; - md.threshold = 2; /*GET/REPLY*/ - md.max_size = 0; - md.options = LNET_MD_TRUNCATE; - md.user_ptr = NULL; - md.eq_handle = eqh; - - rc = LNetMDBind(md, LNET_UNLINK, &mdh); - if (rc) { - CERROR("Can't bind MD: %d\n", rc); - goto out_1; - } - - rc = LNetGet(LNET_NID_ANY, mdh, id, - LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - - if (rc) { - /* Don't CERROR; this could be deliberate! */ - - rc2 = LNetMDUnlink(mdh); - LASSERT(!rc2); - - /* NB must wait for the UNLINK event below... */ - unlinked = 1; - timeout_ms = a_long_time; - } - - do { - /* MUST block for unlink to complete */ - - rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked, - &event, &which); - - CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2, - (rc2 <= 0) ? -1 : event.type, - (rc2 <= 0) ? -1 : event.status, - (rc2 > 0 && event.unlinked) ? " unlinked" : ""); - - LASSERT(rc2 != -EOVERFLOW); /* can't miss anything */ - - if (rc2 <= 0 || event.status) { - /* timeout or error */ - if (!replied && !rc) - rc = (rc2 < 0) ? rc2 : - !rc2 ? -ETIMEDOUT : - event.status; - - if (!unlinked) { - /* Ensure completion in finite time... */ - LNetMDUnlink(mdh); - /* No assertion (racing with network) */ - unlinked = 1; - timeout_ms = a_long_time; - } else if (!rc2) { - /* timed out waiting for unlink */ - CWARN("ping %s: late network completion\n", - libcfs_id2str(id)); - } - } else if (event.type == LNET_EVENT_REPLY) { - replied = 1; - rc = event.mlength; - } - - } while (rc2 <= 0 || !event.unlinked); - - if (!replied) { - if (rc >= 0) - CWARN("%s: Unexpected rc >= 0 but no reply!\n", - libcfs_id2str(id)); - rc = -EIO; - goto out_1; - } - - nob = rc; - LASSERT(nob >= 0 && nob <= infosz); - - rc = -EPROTO; /* if I can't parse... */ - - if (nob < 8) { - /* can't check magic/version */ - CERROR("%s: ping info too short %d\n", - libcfs_id2str(id), nob); - goto out_1; - } - - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) { - lnet_swap_pinginfo(info); - } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) { - CERROR("%s: Unexpected magic %08x\n", - libcfs_id2str(id), info->pi_magic); - goto out_1; - } - - if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) { - CERROR("%s: ping w/o NI status: 0x%x\n", - libcfs_id2str(id), info->pi_features); - goto out_1; - } - - if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) { - CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), - nob, (int)offsetof(struct lnet_ping_info, pi_ni[0])); - goto out_1; - } - - if (info->pi_nnis < n_ids) - n_ids = info->pi_nnis; - - if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) { - CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), - nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids])); - goto out_1; - } - - rc = -EFAULT; /* If I SEGV... */ - - memset(&tmpid, 0, sizeof(tmpid)); - for (i = 0; i < n_ids; i++) { - tmpid.pid = info->pi_pid; - tmpid.nid = info->pi_ni[i].ns_nid; - if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) - goto out_1; - } - rc = info->pi_nnis; - - out_1: - rc2 = LNetEQFree(eqh); - if (rc2) - CERROR("rc2 %d\n", rc2); - LASSERT(!rc2); - - out_0: - kfree(info); - return rc; -} diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c deleted file mode 100644 index 0aea268a4f1c..000000000000 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ /dev/null @@ -1,1234 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <linux/nsproxy.h> -#include <net/net_namespace.h> -#include <linux/lnet/lib-lnet.h> - -struct lnet_text_buf { /* tmp struct for parsing routes */ - struct list_head ltb_list; /* stash on lists */ - int ltb_size; /* allocated size */ - char ltb_text[0]; /* text buffer */ -}; - -static int lnet_tbnob; /* track text buf allocation */ -#define LNET_MAX_TEXTBUF_NOB (64 << 10) /* bound allocation */ -#define LNET_SINGLE_TEXTBUF_NOB (4 << 10) - -static void -lnet_syntax(char *name, char *str, int offset, int width) -{ - static char dots[LNET_SINGLE_TEXTBUF_NOB]; - static char dashes[LNET_SINGLE_TEXTBUF_NOB]; - - memset(dots, '.', sizeof(dots)); - dots[sizeof(dots) - 1] = 0; - memset(dashes, '-', sizeof(dashes)); - dashes[sizeof(dashes) - 1] = 0; - - LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str); - LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n", - (int)strlen(name), dots, offset, dots, - (width < 1) ? 0 : width - 1, dashes); -} - -static int -lnet_issep(char c) -{ - switch (c) { - case '\n': - case '\r': - case ';': - return 1; - default: - return 0; - } -} - -int -lnet_net_unique(__u32 net, struct list_head *nilist) -{ - struct list_head *tmp; - struct lnet_ni *ni; - - list_for_each(tmp, nilist) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (LNET_NIDNET(ni->ni_nid) == net) - return 0; - } - - return 1; -} - -void -lnet_ni_free(struct lnet_ni *ni) -{ - int i; - - if (ni->ni_refs) - cfs_percpt_free(ni->ni_refs); - - if (ni->ni_tx_queues) - cfs_percpt_free(ni->ni_tx_queues); - - if (ni->ni_cpts) - cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts); - - kfree(ni->ni_lnd_tunables); - - for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++) - kfree(ni->ni_interfaces[i]); - - /* release reference to net namespace */ - if (ni->ni_net_ns) - put_net(ni->ni_net_ns); - - kfree(ni); -} - -struct lnet_ni * -lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) -{ - struct lnet_tx_queue *tq; - struct lnet_ni *ni; - int rc; - int i; - - if (!lnet_net_unique(net, nilist)) { - LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n", - libcfs_net2str(net)); - return NULL; - } - - ni = kzalloc(sizeof(*ni), GFP_NOFS); - if (!ni) { - CERROR("Out of memory creating network %s\n", - libcfs_net2str(net)); - return NULL; - } - - spin_lock_init(&ni->ni_lock); - INIT_LIST_HEAD(&ni->ni_cptlist); - ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*ni->ni_refs[0])); - if (!ni->ni_refs) - goto failed; - - ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*ni->ni_tx_queues[0])); - if (!ni->ni_tx_queues) - goto failed; - - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) - INIT_LIST_HEAD(&tq->tq_delayed); - - if (!el) { - ni->ni_cpts = NULL; - ni->ni_ncpts = LNET_CPT_NUMBER; - } else { - rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts); - if (rc <= 0) { - CERROR("Failed to set CPTs for NI %s: %d\n", - libcfs_net2str(net), rc); - goto failed; - } - - LASSERT(rc <= LNET_CPT_NUMBER); - if (rc == LNET_CPT_NUMBER) { - cfs_expr_list_values_free(ni->ni_cpts, LNET_CPT_NUMBER); - ni->ni_cpts = NULL; - } - - ni->ni_ncpts = rc; - } - - /* LND will fill in the address part of the NID */ - ni->ni_nid = LNET_MKNID(net, 0); - - /* Store net namespace in which current ni is being created */ - if (current->nsproxy->net_ns) - ni->ni_net_ns = get_net(current->nsproxy->net_ns); - else - ni->ni_net_ns = NULL; - - ni->ni_last_alive = ktime_get_real_seconds(); - list_add_tail(&ni->ni_list, nilist); - return ni; - failed: - lnet_ni_free(ni); - return NULL; -} - -int -lnet_parse_networks(struct list_head *nilist, char *networks) -{ - struct cfs_expr_list *el = NULL; - char *tokens; - char *str; - char *tmp; - struct lnet_ni *ni; - __u32 net; - int nnets = 0; - struct list_head *temp_node; - - if (!networks) { - CERROR("networks string is undefined\n"); - return -EINVAL; - } - - if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) { - /* _WAY_ conservative */ - LCONSOLE_ERROR_MSG(0x112, - "Can't parse networks: string too long\n"); - return -EINVAL; - } - - tokens = kstrdup(networks, GFP_KERNEL); - if (!tokens) { - CERROR("Can't allocate net tokens\n"); - return -ENOMEM; - } - - tmp = tokens; - str = tokens; - - while (str && *str) { - char *comma = strchr(str, ','); - char *bracket = strchr(str, '('); - char *square = strchr(str, '['); - char *iface; - int niface; - int rc; - - /* - * NB we don't check interface conflicts here; it's the LNDs - * responsibility (if it cares at all) - */ - if (square && (!comma || square < comma)) { - /* - * i.e: o2ib0(ib0)[1,2], number between square - * brackets are CPTs this NI needs to be bond - */ - if (bracket && bracket > square) { - tmp = square; - goto failed_syntax; - } - - tmp = strchr(square, ']'); - if (!tmp) { - tmp = square; - goto failed_syntax; - } - - rc = cfs_expr_list_parse(square, tmp - square + 1, - 0, LNET_CPT_NUMBER - 1, &el); - if (rc) { - tmp = square; - goto failed_syntax; - } - - while (square <= tmp) - *square++ = ' '; - } - - if (!bracket || (comma && comma < bracket)) { - /* no interface list specified */ - - if (comma) - *comma++ = 0; - net = libcfs_str2net(strim(str)); - - if (net == LNET_NIDNET(LNET_NID_ANY)) { - LCONSOLE_ERROR_MSG(0x113, - "Unrecognised network type\n"); - tmp = str; - goto failed_syntax; - } - - if (LNET_NETTYP(net) != LOLND && /* LO is implicit */ - !lnet_ni_alloc(net, el, nilist)) - goto failed; - - if (el) { - cfs_expr_list_free(el); - el = NULL; - } - - str = comma; - continue; - } - - *bracket = 0; - net = libcfs_str2net(strim(str)); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - tmp = str; - goto failed_syntax; - } - - ni = lnet_ni_alloc(net, el, nilist); - if (!ni) - goto failed; - - if (el) { - cfs_expr_list_free(el); - el = NULL; - } - - niface = 0; - iface = bracket + 1; - - bracket = strchr(iface, ')'); - if (!bracket) { - tmp = iface; - goto failed_syntax; - } - - *bracket = 0; - do { - comma = strchr(iface, ','); - if (comma) - *comma++ = 0; - - iface = strim(iface); - if (!*iface) { - tmp = iface; - goto failed_syntax; - } - - if (niface == LNET_MAX_INTERFACES) { - LCONSOLE_ERROR_MSG(0x115, - "Too many interfaces for net %s\n", - libcfs_net2str(net)); - goto failed; - } - - /* - * Allocate a separate piece of memory and copy - * into it the string, so we don't have - * a depencency on the tokens string. This way we - * can free the tokens at the end of the function. - * The newly allocated ni_interfaces[] can be - * freed when freeing the NI - */ - ni->ni_interfaces[niface] = kstrdup(iface, GFP_KERNEL); - if (!ni->ni_interfaces[niface]) { - CERROR("Can't allocate net interface name\n"); - goto failed; - } - niface++; - iface = comma; - } while (iface); - - str = bracket + 1; - comma = strchr(bracket + 1, ','); - if (comma) { - *comma = 0; - str = strim(str); - if (*str) { - tmp = str; - goto failed_syntax; - } - str = comma + 1; - continue; - } - - str = strim(str); - if (*str) { - tmp = str; - goto failed_syntax; - } - } - - list_for_each(temp_node, nilist) - nnets++; - - kfree(tokens); - return nnets; - - failed_syntax: - lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp)); - failed: - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, struct lnet_ni, ni_list); - - list_del(&ni->ni_list); - lnet_ni_free(ni); - } - - if (el) - cfs_expr_list_free(el); - - kfree(tokens); - - return -EINVAL; -} - -static struct lnet_text_buf * -lnet_new_text_buf(int str_len) -{ - struct lnet_text_buf *ltb; - int nob; - - /* NB allocate space for the terminating 0 */ - nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]); - if (nob > LNET_SINGLE_TEXTBUF_NOB) { - /* _way_ conservative for "route net gateway..." */ - CERROR("text buffer too big\n"); - return NULL; - } - - if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) { - CERROR("Too many text buffers\n"); - return NULL; - } - - ltb = kzalloc(nob, GFP_KERNEL); - if (!ltb) - return NULL; - - ltb->ltb_size = nob; - ltb->ltb_text[0] = 0; - lnet_tbnob += nob; - return ltb; -} - -static void -lnet_free_text_buf(struct lnet_text_buf *ltb) -{ - lnet_tbnob -= ltb->ltb_size; - kfree(ltb); -} - -static void -lnet_free_text_bufs(struct list_head *tbs) -{ - struct lnet_text_buf *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list); - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } -} - -static int -lnet_str2tbs_sep(struct list_head *tbs, char *str) -{ - struct list_head pending; - char *sep; - int nob; - int i; - struct lnet_text_buf *ltb; - - INIT_LIST_HEAD(&pending); - - /* Split 'str' into separate commands */ - for (;;) { - /* skip leading whitespace */ - while (isspace(*str)) - str++; - - /* scan for separator or comment */ - for (sep = str; *sep; sep++) - if (lnet_issep(*sep) || *sep == '#') - break; - - nob = (int)(sep - str); - if (nob > 0) { - ltb = lnet_new_text_buf(nob); - if (!ltb) { - lnet_free_text_bufs(&pending); - return -ENOMEM; - } - - for (i = 0; i < nob; i++) - if (isspace(str[i])) - ltb->ltb_text[i] = ' '; - else - ltb->ltb_text[i] = str[i]; - - ltb->ltb_text[nob] = 0; - - list_add_tail(<b->ltb_list, &pending); - } - - if (*sep == '#') { - /* scan for separator */ - do { - sep++; - } while (*sep && !lnet_issep(*sep)); - } - - if (!*sep) - break; - - str = sep + 1; - } - - list_splice(&pending, tbs->prev); - return 0; -} - -static int -lnet_expand1tb(struct list_head *list, - char *str, char *sep1, char *sep2, - char *item, int itemlen) -{ - int len1 = (int)(sep1 - str); - int len2 = strlen(sep2 + 1); - struct lnet_text_buf *ltb; - - LASSERT(*sep1 == '['); - LASSERT(*sep2 == ']'); - - ltb = lnet_new_text_buf(len1 + itemlen + len2); - if (!ltb) - return -ENOMEM; - - memcpy(ltb->ltb_text, str, len1); - memcpy(<b->ltb_text[len1], item, itemlen); - memcpy(<b->ltb_text[len1 + itemlen], sep2 + 1, len2); - ltb->ltb_text[len1 + itemlen + len2] = 0; - - list_add_tail(<b->ltb_list, list); - return 0; -} - -static int -lnet_str2tbs_expand(struct list_head *tbs, char *str) -{ - char num[16]; - struct list_head pending; - char *sep; - char *sep2; - char *parsed; - char *enditem; - int lo; - int hi; - int stride; - int i; - int nob; - int scanned; - - INIT_LIST_HEAD(&pending); - - sep = strchr(str, '['); - if (!sep) /* nothing to expand */ - return 0; - - sep2 = strchr(sep, ']'); - if (!sep2) - goto failed; - - for (parsed = sep; parsed < sep2; parsed = enditem) { - enditem = ++parsed; - while (enditem < sep2 && *enditem != ',') - enditem++; - - if (enditem == parsed) /* no empty items */ - goto failed; - - if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, - &stride, &scanned) < 3) { - if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) { - /* simple string enumeration */ - if (lnet_expand1tb(&pending, str, sep, sep2, - parsed, - (int)(enditem - parsed))) { - goto failed; - } - continue; - } - - stride = 1; - } - - /* range expansion */ - - if (enditem != parsed + scanned) /* no trailing junk */ - goto failed; - - if (hi < 0 || lo < 0 || stride < 0 || hi < lo || - (hi - lo) % stride) - goto failed; - - for (i = lo; i <= hi; i += stride) { - snprintf(num, sizeof(num), "%d", i); - nob = strlen(num); - if (nob + 1 == sizeof(num)) - goto failed; - - if (lnet_expand1tb(&pending, str, sep, sep2, - num, nob)) - goto failed; - } - } - - list_splice(&pending, tbs->prev); - return 1; - - failed: - lnet_free_text_bufs(&pending); - return -EINVAL; -} - -static int -lnet_parse_hops(char *str, unsigned int *hops) -{ - int len = strlen(str); - int nob = len; - - return (sscanf(str, "%u%n", hops, &nob) >= 1 && - nob == len && - *hops > 0 && *hops < 256); -} - -#define LNET_PRIORITY_SEPARATOR (':') - -static int -lnet_parse_priority(char *str, unsigned int *priority, char **token) -{ - int nob; - char *sep; - int len; - - sep = strchr(str, LNET_PRIORITY_SEPARATOR); - if (!sep) { - *priority = 0; - return 0; - } - len = strlen(sep + 1); - - if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) { - /* - * Update the caller's token pointer so it treats the found - * priority as the token to report in the error message. - */ - *token += sep - str + 1; - return -EINVAL; - } - - CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob); - - /* - * Change priority separator to \0 to be able to parse NID - */ - *sep = '\0'; - return 0; -} - -static int -lnet_parse_route(char *str, int *im_a_router) -{ - /* static scratch buffer OK (single threaded) */ - static char cmd[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head nets; - struct list_head gateways; - struct list_head *tmp1; - struct list_head *tmp2; - __u32 net; - lnet_nid_t nid; - struct lnet_text_buf *ltb; - int rc; - char *sep; - char *token = str; - int ntokens = 0; - int myrc = -1; - __u32 hops; - int got_hops = 0; - unsigned int priority = 0; - - INIT_LIST_HEAD(&gateways); - INIT_LIST_HEAD(&nets); - - /* save a copy of the string for error messages */ - strncpy(cmd, str, sizeof(cmd)); - cmd[sizeof(cmd) - 1] = '\0'; - - sep = str; - for (;;) { - /* scan for token start */ - while (isspace(*sep)) - sep++; - if (!*sep) { - if (ntokens < (got_hops ? 3 : 2)) - goto token_error; - break; - } - - ntokens++; - token = sep++; - - /* scan for token end */ - while (*sep && !isspace(*sep)) - sep++; - if (*sep) - *sep++ = 0; - - if (ntokens == 1) { - tmp2 = &nets; /* expanding nets */ - } else if (ntokens == 2 && - lnet_parse_hops(token, &hops)) { - got_hops = 1; /* got a hop count */ - continue; - } else { - tmp2 = &gateways; /* expanding gateways */ - } - - ltb = lnet_new_text_buf(strlen(token)); - if (!ltb) - goto out; - - strcpy(ltb->ltb_text, token); - tmp1 = <b->ltb_list; - list_add_tail(tmp1, tmp2); - - while (tmp1 != tmp2) { - ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list); - - rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text); - if (rc < 0) - goto token_error; - - tmp1 = tmp1->next; - - if (rc > 0) { /* expanded! */ - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - continue; - } - - if (ntokens == 1) { - net = libcfs_str2net(ltb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND) - goto token_error; - } else { - rc = lnet_parse_priority(ltb->ltb_text, - &priority, &token); - if (rc < 0) - goto token_error; - - nid = libcfs_str2nid(ltb->ltb_text); - if (nid == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) - goto token_error; - } - } - } - - /** - * if there are no hops set then we want to flag this value as - * unset since hops is an optional parameter - */ - if (!got_hops) - hops = LNET_UNDEFINED_HOPS; - - LASSERT(!list_empty(&nets)); - LASSERT(!list_empty(&gateways)); - - list_for_each(tmp1, &nets) { - ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list); - net = libcfs_str2net(ltb->ltb_text); - LASSERT(net != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each(tmp2, &gateways) { - ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list); - nid = libcfs_str2nid(ltb->ltb_text); - LASSERT(nid != LNET_NID_ANY); - - if (lnet_islocalnid(nid)) { - *im_a_router = 1; - continue; - } - - rc = lnet_add_route(net, hops, nid, priority); - if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) { - CERROR("Can't create route to %s via %s\n", - libcfs_net2str(net), - libcfs_nid2str(nid)); - goto out; - } - } - } - - myrc = 0; - goto out; - - token_error: - lnet_syntax("routes", cmd, (int)(token - str), strlen(token)); - out: - lnet_free_text_bufs(&nets); - lnet_free_text_bufs(&gateways); - return myrc; -} - -static int -lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router) -{ - struct lnet_text_buf *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list); - - if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) { - lnet_free_text_bufs(tbs); - return -EINVAL; - } - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } - - return 0; -} - -int -lnet_parse_routes(char *routes, int *im_a_router) -{ - struct list_head tbs; - int rc = 0; - - *im_a_router = 0; - - INIT_LIST_HEAD(&tbs); - - if (lnet_str2tbs_sep(&tbs, routes) < 0) { - CERROR("Error parsing routes\n"); - rc = -EINVAL; - } else { - rc = lnet_parse_route_tbs(&tbs, im_a_router); - } - - LASSERT(!lnet_tbnob); - return rc; -} - -static int -lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip) -{ - LIST_HEAD(list); - int rc; - int i; - - rc = cfs_ip_addr_parse(token, len, &list); - if (rc) - return rc; - - for (rc = i = 0; !rc && i < nip; i++) - rc = cfs_ip_addr_match(ipaddrs[i], &list); - - cfs_expr_list_free_list(&list); - - return rc; -} - -static int -lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip) -{ - static char tokens[LNET_SINGLE_TEXTBUF_NOB]; - - int matched = 0; - int ntokens = 0; - int len; - char *net = NULL; - char *sep; - char *token; - int rc; - - LASSERT(strlen(net_entry) < sizeof(tokens)); - - /* work on a copy of the string */ - strcpy(tokens, net_entry); - sep = tokens; - for (;;) { - /* scan for token start */ - while (isspace(*sep)) - sep++; - if (!*sep) - break; - - token = sep++; - - /* scan for token end */ - while (*sep && !isspace(*sep)) - sep++; - if (*sep) - *sep++ = 0; - - if (!ntokens++) { - net = token; - continue; - } - - len = strlen(token); - - rc = lnet_match_network_token(token, len, ipaddrs, nip); - if (rc < 0) { - lnet_syntax("ip2nets", net_entry, - (int)(token - tokens), len); - return rc; - } - - if (rc) - matched |= 1; - } - - if (!matched) - return 0; - - strcpy(net_entry, net); /* replace with matched net */ - return 1; -} - -static __u32 -lnet_netspec2net(char *netspec) -{ - char *bracket = strchr(netspec, '('); - __u32 net; - - if (bracket) - *bracket = 0; - - net = libcfs_str2net(netspec); - - if (bracket) - *bracket = '('; - - return net; -} - -static int -lnet_splitnets(char *source, struct list_head *nets) -{ - int offset = 0; - int offset2; - int len; - struct lnet_text_buf *tb; - struct lnet_text_buf *tb2; - struct list_head *t; - char *sep; - char *bracket; - __u32 net; - - LASSERT(!list_empty(nets)); - LASSERT(nets->next == nets->prev); /* single entry */ - - tb = list_entry(nets->next, struct lnet_text_buf, ltb_list); - - for (;;) { - sep = strchr(tb->ltb_text, ','); - bracket = strchr(tb->ltb_text, '('); - - if (sep && bracket && bracket < sep) { - /* netspec lists interfaces... */ - - offset2 = offset + (int)(bracket - tb->ltb_text); - len = strlen(bracket); - - bracket = strchr(bracket + 1, ')'); - - if (!bracket || - !(bracket[1] == ',' || !bracket[1])) { - lnet_syntax("ip2nets", source, offset2, len); - return -EINVAL; - } - - sep = !bracket[1] ? NULL : bracket + 1; - } - - if (sep) - *sep++ = 0; - - net = lnet_netspec2net(tb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - - list_for_each(t, nets) { - tb2 = list_entry(t, struct lnet_text_buf, ltb_list); - - if (tb2 == tb) - continue; - - if (net == lnet_netspec2net(tb2->ltb_text)) { - /* duplicate network */ - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - } - - if (!sep) - return 0; - - offset += (int)(sep - tb->ltb_text); - len = strlen(sep); - tb2 = lnet_new_text_buf(len); - if (!tb2) - return -ENOMEM; - - strncpy(tb2->ltb_text, sep, len); - tb2->ltb_text[len] = '\0'; - list_add_tail(&tb2->ltb_list, nets); - - tb = tb2; - } -} - -static int -lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) -{ - static char networks[LNET_SINGLE_TEXTBUF_NOB]; - static char source[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head raw_entries; - struct list_head matched_nets; - struct list_head current_nets; - struct list_head *t; - struct list_head *t2; - struct lnet_text_buf *tb; - struct lnet_text_buf *temp; - struct lnet_text_buf *tb2; - __u32 net1; - __u32 net2; - int len; - int count; - int dup; - int rc; - - INIT_LIST_HEAD(&raw_entries); - if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) { - CERROR("Error parsing ip2nets\n"); - LASSERT(!lnet_tbnob); - return -EINVAL; - } - - INIT_LIST_HEAD(&matched_nets); - INIT_LIST_HEAD(¤t_nets); - networks[0] = 0; - count = 0; - len = 0; - rc = 0; - - list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) { - strncpy(source, tb->ltb_text, sizeof(source)); - source[sizeof(source) - 1] = '\0'; - - /* replace ltb_text with the network(s) add on match */ - rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip); - if (rc < 0) - break; - - list_del(&tb->ltb_list); - - if (!rc) { /* no match */ - lnet_free_text_buf(tb); - continue; - } - - /* split into separate networks */ - INIT_LIST_HEAD(¤t_nets); - list_add(&tb->ltb_list, ¤t_nets); - rc = lnet_splitnets(source, ¤t_nets); - if (rc < 0) - break; - - dup = 0; - list_for_each(t, ¤t_nets) { - tb = list_entry(t, struct lnet_text_buf, ltb_list); - net1 = lnet_netspec2net(tb->ltb_text); - LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each(t2, &matched_nets) { - tb2 = list_entry(t2, struct lnet_text_buf, - ltb_list); - net2 = lnet_netspec2net(tb2->ltb_text); - LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY)); - - if (net1 == net2) { - dup = 1; - break; - } - } - - if (dup) - break; - } - - if (dup) { - lnet_free_text_bufs(¤t_nets); - continue; - } - - list_for_each_safe(t, t2, ¤t_nets) { - tb = list_entry(t, struct lnet_text_buf, ltb_list); - - list_del(&tb->ltb_list); - list_add_tail(&tb->ltb_list, &matched_nets); - - len += snprintf(networks + len, sizeof(networks) - len, - "%s%s", !len ? "" : ",", - tb->ltb_text); - - if (len >= sizeof(networks)) { - CERROR("Too many matched networks\n"); - rc = -E2BIG; - goto out; - } - } - - count++; - } - - out: - lnet_free_text_bufs(&raw_entries); - lnet_free_text_bufs(&matched_nets); - lnet_free_text_bufs(¤t_nets); - LASSERT(!lnet_tbnob); - - if (rc < 0) - return rc; - - *networksp = networks; - return count; -} - -static int -lnet_ipaddr_enumerate(__u32 **ipaddrsp) -{ - int up; - __u32 netmask; - __u32 *ipaddrs; - __u32 *ipaddrs2; - int nip; - char **ifnames; - int nif = lnet_ipif_enumerate(&ifnames); - int i; - int rc; - - if (nif <= 0) - return nif; - - ipaddrs = kcalloc(nif, sizeof(*ipaddrs), GFP_KERNEL); - if (!ipaddrs) { - CERROR("Can't allocate ipaddrs[%d]\n", nif); - lnet_ipif_free_enumeration(ifnames, nif); - return -ENOMEM; - } - - for (i = nip = 0; i < nif; i++) { - if (!strcmp(ifnames[i], "lo")) - continue; - - rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask); - if (rc) { - CWARN("Can't query interface %s: %d\n", - ifnames[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s: it's down\n", - ifnames[i]); - continue; - } - - nip++; - } - - lnet_ipif_free_enumeration(ifnames, nif); - - if (nip == nif) { - *ipaddrsp = ipaddrs; - } else { - if (nip > 0) { - ipaddrs2 = kcalloc(nip, sizeof(*ipaddrs2), - GFP_KERNEL); - if (!ipaddrs2) { - CERROR("Can't allocate ipaddrs[%d]\n", nip); - nip = -ENOMEM; - } else { - memcpy(ipaddrs2, ipaddrs, - nip * sizeof(*ipaddrs)); - *ipaddrsp = ipaddrs2; - rc = nip; - } - } - kfree(ipaddrs); - } - return nip; -} - -int -lnet_parse_ip2nets(char **networksp, char *ip2nets) -{ - __u32 *ipaddrs = NULL; - int nip = lnet_ipaddr_enumerate(&ipaddrs); - int rc; - - if (nip < 0) { - LCONSOLE_ERROR_MSG(0x117, - "Error %d enumerating local IP interfaces for ip2nets to match\n", - nip); - return nip; - } - - if (!nip) { - LCONSOLE_ERROR_MSG(0x118, - "No local IP interfaces for ip2nets to match\n"); - return -ENOENT; - } - - rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip); - kfree(ipaddrs); - - if (rc < 0) { - LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc); - return rc; - } - - if (!rc) { - LCONSOLE_ERROR_MSG(0x11a, - "ip2nets does not match any local IP interfaces\n"); - return -ENOENT; - } - - return 0; -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c deleted file mode 100644 index ea53b5cb3f72..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-eq.c +++ /dev/null @@ -1,426 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-eq.c - * - * Library level Event queue management routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> - -/** - * Create an event queue that has room for \a count number of events. - * - * The event queue is circular and older events will be overwritten by new - * ones if they are not removed in time by the user using the functions - * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to - * determine the appropriate size of the event queue to prevent this loss - * of events. Note that when EQ handler is specified in \a callback, no - * event loss can happen, since the handler is run for each event deposited - * into the EQ. - * - * \param count The number of events to be stored in the event queue. It - * will be rounded up to the next power of two. - * \param callback A handler function that runs when an event is deposited - * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to - * indicate that no event handler is desired. - * \param handle On successful return, this location will hold a handle for - * the newly created EQ. - * - * \retval 0 On success. - * \retval -EINVAL If an parameter is not valid. - * \retval -ENOMEM If memory for the EQ can't be allocated. - * - * \see lnet_eq_handler_t for the discussion on EQ handler semantics. - */ -int -LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback, - struct lnet_handle_eq *handle) -{ - struct lnet_eq *eq; - - LASSERT(the_lnet.ln_refcount > 0); - - /* - * We need count to be a power of 2 so that when eq_{enq,deq}_seq - * overflow, they don't skip entries, so the queue has the same - * apparent capacity at all times - */ - if (count) - count = roundup_pow_of_two(count); - - if (callback != LNET_EQ_HANDLER_NONE && count) - CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count); - - /* - * count can be 0 if only need callback, we can eliminate - * overhead of enqueue event - */ - if (!count && callback == LNET_EQ_HANDLER_NONE) - return -EINVAL; - - eq = kzalloc(sizeof(*eq), GFP_NOFS); - if (!eq) - return -ENOMEM; - - if (count) { - eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event), - GFP_KERNEL | __GFP_ZERO); - if (!eq->eq_events) - goto failed; - /* - * NB allocator has set all event sequence numbers to 0, - * so all them should be earlier than eq_deq_seq - */ - } - - eq->eq_deq_seq = 1; - eq->eq_enq_seq = 1; - eq->eq_size = count; - eq->eq_callback = callback; - - eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*eq->eq_refs[0])); - if (!eq->eq_refs) - goto failed; - - /* MUST hold both exclusive lnet_res_lock */ - lnet_res_lock(LNET_LOCK_EX); - /* - * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do - * both EQ lookup and poll event with only lnet_eq_wait_lock - */ - lnet_eq_wait_lock(); - - lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh); - list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active); - - lnet_eq_wait_unlock(); - lnet_res_unlock(LNET_LOCK_EX); - - lnet_eq2handle(handle, eq); - return 0; - -failed: - kvfree(eq->eq_events); - - if (eq->eq_refs) - cfs_percpt_free(eq->eq_refs); - - kfree(eq); - return -ENOMEM; -} -EXPORT_SYMBOL(LNetEQAlloc); - -/** - * Release the resources associated with an event queue if it's idle; - * otherwise do nothing and it's up to the user to try again. - * - * \param eqh A handle for the event queue to be released. - * - * \retval 0 If the EQ is not in use and freed. - * \retval -ENOENT If \a eqh does not point to a valid EQ. - * \retval -EBUSY If the EQ is still in use by some MDs. - */ -int -LNetEQFree(struct lnet_handle_eq eqh) -{ - struct lnet_eq *eq; - struct lnet_event *events = NULL; - int **refs = NULL; - int *ref; - int rc = 0; - int size = 0; - int i; - - LASSERT(the_lnet.ln_refcount > 0); - - lnet_res_lock(LNET_LOCK_EX); - /* - * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do - * both EQ lookup and poll event with only lnet_eq_wait_lock - */ - lnet_eq_wait_lock(); - - eq = lnet_handle2eq(&eqh); - if (!eq) { - rc = -ENOENT; - goto out; - } - - cfs_percpt_for_each(ref, i, eq->eq_refs) { - LASSERT(*ref >= 0); - if (!*ref) - continue; - - CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n", - i, *ref); - rc = -EBUSY; - goto out; - } - - /* stash for free after lock dropped */ - events = eq->eq_events; - size = eq->eq_size; - refs = eq->eq_refs; - - lnet_res_lh_invalidate(&eq->eq_lh); - list_del(&eq->eq_list); - kfree(eq); - out: - lnet_eq_wait_unlock(); - lnet_res_unlock(LNET_LOCK_EX); - - kvfree(events); - if (refs) - cfs_percpt_free(refs); - - return rc; -} -EXPORT_SYMBOL(LNetEQFree); - -void -lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev) -{ - /* MUST called with resource lock hold but w/o lnet_eq_wait_lock */ - int index; - - if (!eq->eq_size) { - LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE); - eq->eq_callback(ev); - return; - } - - lnet_eq_wait_lock(); - ev->sequence = eq->eq_enq_seq++; - - LASSERT(eq->eq_size == LOWEST_BIT_SET(eq->eq_size)); - index = ev->sequence & (eq->eq_size - 1); - - eq->eq_events[index] = *ev; - - if (eq->eq_callback != LNET_EQ_HANDLER_NONE) - eq->eq_callback(ev); - - /* Wake anyone waiting in LNetEQPoll() */ - if (waitqueue_active(&the_lnet.ln_eq_waitq)) - wake_up_all(&the_lnet.ln_eq_waitq); - lnet_eq_wait_unlock(); -} - -static int -lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev) -{ - int new_index = eq->eq_deq_seq & (eq->eq_size - 1); - struct lnet_event *new_event = &eq->eq_events[new_index]; - int rc; - - /* must called with lnet_eq_wait_lock hold */ - if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence)) - return 0; - - /* We've got a new event... */ - *ev = *new_event; - - CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n", - new_event, eq->eq_deq_seq, eq->eq_size); - - /* ...but did it overwrite an event we've not seen yet? */ - if (eq->eq_deq_seq == new_event->sequence) { - rc = 1; - } else { - /* - * don't complain with CERROR: some EQs are sized small - * anyway; if it's important, the caller should complain - */ - CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n", - eq->eq_deq_seq, new_event->sequence); - rc = -EOVERFLOW; - } - - eq->eq_deq_seq = new_event->sequence + 1; - return rc; -} - -/** - * A nonblocking function that can be used to get the next event in an EQ. - * If an event handler is associated with the EQ, the handler will run before - * this function returns successfully. The event is removed from the queue. - * - * \param eventq A handle for the event queue. - * \param event On successful return (1 or -EOVERFLOW), this location will - * hold the next event in the EQ. - * - * \retval 0 No pending event in the EQ. - * \retval 1 Indicates success. - * \retval -ENOENT If \a eventq does not point to a valid EQ. - * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that - * at least one event between this event and the last event obtained from the - * EQ has been dropped due to limited space in the EQ. - */ - -/** - * Block the calling process until there is an event in the EQ. - * If an event handler is associated with the EQ, the handler will run before - * this function returns successfully. This function returns the next event - * in the EQ and removes it from the EQ. - * - * \param eventq A handle for the event queue. - * \param event On successful return (1 or -EOVERFLOW), this location will - * hold the next event in the EQ. - * - * \retval 1 Indicates success. - * \retval -ENOENT If \a eventq does not point to a valid EQ. - * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that - * at least one event between this event and the last event obtained from the - * EQ has been dropped due to limited space in the EQ. - */ - -static int -lnet_eq_wait_locked(int *timeout_ms, long state) -__must_hold(&the_lnet.ln_eq_wait_lock) -{ - int tms = *timeout_ms; - int wait; - wait_queue_entry_t wl; - unsigned long now; - - if (!tms) - return -ENXIO; /* don't want to wait and no new event */ - - init_waitqueue_entry(&wl, current); - set_current_state(state); - add_wait_queue(&the_lnet.ln_eq_waitq, &wl); - - lnet_eq_wait_unlock(); - - if (tms < 0) { - schedule(); - } else { - now = jiffies; - schedule_timeout(msecs_to_jiffies(tms)); - tms -= jiffies_to_msecs(jiffies - now); - if (tms < 0) /* no more wait but may have new event */ - tms = 0; - } - - wait = tms; /* might need to call here again */ - *timeout_ms = tms; - - lnet_eq_wait_lock(); - remove_wait_queue(&the_lnet.ln_eq_waitq, &wl); - - return wait; -} - -/** - * Block the calling process until there's an event from a set of EQs or - * timeout happens. - * - * If an event handler is associated with the EQ, the handler will run before - * this function returns successfully, in which case the corresponding event - * is consumed. - * - * LNetEQPoll() provides a timeout to allow applications to poll, block for a - * fixed period, or block indefinitely. - * - * \param eventqs,neq An array of EQ handles, and size of the array. - * \param timeout_ms Time in milliseconds to wait for an event to occur on - * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an - * infinite timeout. - * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD - * \param event,which On successful return (1 or -EOVERFLOW), \a event will - * hold the next event in the EQs, and \a which will contain the index of the - * EQ from which the event was taken. - * - * \retval 0 No pending event in the EQs after timeout. - * \retval 1 Indicates success. - * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that - * at least one event between this event and the last event obtained from the - * EQ indicated by \a which has been dropped due to limited space in the EQ. - * \retval -ENOENT If there's an invalid handle in \a eventqs. - */ -int -LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms, - int interruptible, - struct lnet_event *event, int *which) -{ - int wait = 1; - int rc; - int i; - - LASSERT(the_lnet.ln_refcount > 0); - - if (neq < 1) - return -ENOENT; - - lnet_eq_wait_lock(); - - for (;;) { - for (i = 0; i < neq; i++) { - struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]); - - if (!eq) { - lnet_eq_wait_unlock(); - return -ENOENT; - } - - rc = lnet_eq_dequeue_event(eq, event); - if (rc) { - lnet_eq_wait_unlock(); - *which = i; - return rc; - } - } - - if (!wait) - break; - - /* - * return value of lnet_eq_wait_locked: - * -1 : did nothing and it's sure no new event - * 1 : sleep inside and wait until new event - * 0 : don't want to wait anymore, but might have new event - * so need to call dequeue again - */ - wait = lnet_eq_wait_locked(&timeout_ms, - interruptible ? TASK_INTERRUPTIBLE - : TASK_NOLOAD); - if (wait < 0) /* no new event */ - break; - } - - lnet_eq_wait_unlock(); - return 0; -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c deleted file mode 100644 index 8a22514aaf71..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-md.c +++ /dev/null @@ -1,463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-md.c - * - * Memory Descriptor management routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> - -/* must be called with lnet_res_lock held */ -void -lnet_md_unlink(struct lnet_libmd *md) -{ - if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) { - /* first unlink attempt... */ - struct lnet_me *me = md->md_me; - - md->md_flags |= LNET_MD_FLAG_ZOMBIE; - - /* - * Disassociate from ME (if any), - * and unlink it if it was created - * with LNET_UNLINK - */ - if (me) { - /* detach MD from portal */ - lnet_ptl_detach_md(me, md); - if (me->me_unlink == LNET_UNLINK) - lnet_me_unlink(me); - } - - /* ensure all future handle lookups fail */ - lnet_res_lh_invalidate(&md->md_lh); - } - - if (md->md_refcount) { - CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - return; - } - - CDEBUG(D_NET, "Unlinking md %p\n", md); - - if (md->md_eq) { - int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie); - - LASSERT(*md->md_eq->eq_refs[cpt] > 0); - (*md->md_eq->eq_refs[cpt])--; - } - - LASSERT(!list_empty(&md->md_list)); - list_del_init(&md->md_list); - kfree(md); -} - -static int -lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink) -{ - int i; - unsigned int niov; - int total_length = 0; - - lmd->md_me = NULL; - lmd->md_start = umd->start; - lmd->md_offset = 0; - lmd->md_max_size = umd->max_size; - lmd->md_options = umd->options; - lmd->md_user_ptr = umd->user_ptr; - lmd->md_eq = NULL; - lmd->md_threshold = umd->threshold; - lmd->md_refcount = 0; - lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0; - - if (umd->options & LNET_MD_IOVEC) { - if (umd->options & LNET_MD_KIOV) /* Can't specify both */ - return -EINVAL; - - niov = umd->length; - lmd->md_niov = umd->length; - memcpy(lmd->md_iov.iov, umd->start, - niov * sizeof(lmd->md_iov.iov[0])); - - for (i = 0; i < (int)niov; i++) { - /* We take the base address on trust */ - /* invalid length */ - if (lmd->md_iov.iov[i].iov_len <= 0) - return -EINVAL; - - total_length += lmd->md_iov.iov[i].iov_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */ - (umd->max_size < 0 || - umd->max_size > total_length)) /* illegal max_size */ - return -EINVAL; - - } else if (umd->options & LNET_MD_KIOV) { - niov = umd->length; - lmd->md_niov = umd->length; - memcpy(lmd->md_iov.kiov, umd->start, - niov * sizeof(lmd->md_iov.kiov[0])); - - for (i = 0; i < (int)niov; i++) { - /* We take the page pointer on trust */ - if (lmd->md_iov.kiov[i].bv_offset + - lmd->md_iov.kiov[i].bv_len > PAGE_SIZE) - return -EINVAL; /* invalid length */ - - total_length += lmd->md_iov.kiov[i].bv_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) /* illegal max_size */ - return -EINVAL; - } else { /* contiguous */ - lmd->md_length = umd->length; - niov = 1; - lmd->md_niov = 1; - lmd->md_iov.iov[0].iov_base = umd->start; - lmd->md_iov.iov[0].iov_len = umd->length; - - if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ - (umd->max_size < 0 || - umd->max_size > (int)umd->length)) /* illegal max_size */ - return -EINVAL; - } - - return 0; -} - -/* must be called with resource lock held */ -static int -lnet_md_link(struct lnet_libmd *md, struct lnet_handle_eq eq_handle, int cpt) -{ - struct lnet_res_container *container = the_lnet.ln_md_containers[cpt]; - - /* - * NB we are passed an allocated, but inactive md. - * if we return success, caller may lnet_md_unlink() it. - * otherwise caller may only kfree() it. - */ - /* - * This implementation doesn't know how to create START events or - * disable END events. Best to LASSERT our caller is compliant so - * we find out quickly... - */ - /* - * TODO - reevaluate what should be here in light of - * the removal of the start and end events - * maybe there we shouldn't even allow LNET_EQ_NONE!) - * LASSERT(!eq); - */ - if (!LNetEQHandleIsInvalid(eq_handle)) { - md->md_eq = lnet_handle2eq(&eq_handle); - - if (!md->md_eq) - return -ENOENT; - - (*md->md_eq->eq_refs[cpt])++; - } - - lnet_res_lh_initialize(container, &md->md_lh); - - LASSERT(list_empty(&md->md_list)); - list_add(&md->md_list, &container->rec_active); - - return 0; -} - -/* must be called with lnet_res_lock held */ -void -lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd) -{ - /* NB this doesn't copy out all the iov entries so when a - * discontiguous MD is copied out, the target gets to know the - * original iov pointer (in start) and the number of entries it had - * and that's all. - */ - umd->start = lmd->md_start; - umd->length = !(lmd->md_options & - (LNET_MD_IOVEC | LNET_MD_KIOV)) ? - lmd->md_length : lmd->md_niov; - umd->threshold = lmd->md_threshold; - umd->max_size = lmd->md_max_size; - umd->options = lmd->md_options; - umd->user_ptr = lmd->md_user_ptr; - lnet_eq2handle(&umd->eq_handle, lmd->md_eq); -} - -static int -lnet_md_validate(struct lnet_md *umd) -{ - if (!umd->start && umd->length) { - CERROR("MD start pointer can not be NULL with length %u\n", - umd->length); - return -EINVAL; - } - - if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) && - umd->length > LNET_MAX_IOV) { - CERROR("Invalid option: too many fragments %u, %d max\n", - umd->length, LNET_MAX_IOV); - return -EINVAL; - } - - return 0; -} - -/** - * Create a memory descriptor and attach it to a ME - * - * \param meh A handle for a ME to associate the new MD with. - * \param umd Provides initial values for the user-visible parts of a MD. - * Other than its use for initialization, there is no linkage between this - * structure and the MD maintained by the LNet. - * \param unlink A flag to indicate whether the MD is automatically unlinked - * when it becomes inactive, either because the operation threshold drops to - * zero or because the available memory becomes less than \a umd.max_size. - * (Note that the check for unlinking a MD only occurs after the completion - * of a successful operation on the MD.) The value LNET_UNLINK enables auto - * unlinking; the value LNET_RETAIN disables it. - * \param handle On successful returns, a handle to the newly created MD is - * saved here. This handle can be used later in LNetMDUnlink(). - * - * \retval 0 On success. - * \retval -EINVAL If \a umd is not valid. - * \retval -ENOMEM If new MD cannot be allocated. - * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a - * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by - * calling LNetInvalidateHandle() on it. - * \retval -EBUSY If the ME pointed to by \a meh is already associated with - * a MD. - */ -int -LNetMDAttach(struct lnet_handle_me meh, struct lnet_md umd, - enum lnet_unlink unlink, struct lnet_handle_md *handle) -{ - LIST_HEAD(matches); - LIST_HEAD(drops); - struct lnet_me *me; - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (lnet_md_validate(&umd)) - return -EINVAL; - - if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { - CERROR("Invalid option: no MD_OP set\n"); - return -EINVAL; - } - - md = lnet_md_alloc(&umd); - if (!md) - return -ENOMEM; - - rc = lnet_md_build(md, &umd, unlink); - if (rc) - goto out_free; - - cpt = lnet_cpt_of_cookie(meh.cookie); - - lnet_res_lock(cpt); - - me = lnet_handle2me(&meh); - if (!me) - rc = -ENOENT; - else if (me->me_md) - rc = -EBUSY; - else - rc = lnet_md_link(md, umd.eq_handle, cpt); - - if (rc) - goto out_unlock; - - /* - * attach this MD to portal of ME and check if it matches any - * blocked msgs on this portal - */ - lnet_ptl_attach_md(me, md, &matches, &drops); - - lnet_md2handle(handle, md); - - lnet_res_unlock(cpt); - - lnet_drop_delayed_msg_list(&drops, "Bad match"); - lnet_recv_delayed_msg_list(&matches); - - return 0; - -out_unlock: - lnet_res_unlock(cpt); -out_free: - kfree(md); - return rc; -} -EXPORT_SYMBOL(LNetMDAttach); - -/** - * Create a "free floating" memory descriptor - a MD that is not associated - * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations. - * - * \param umd,unlink See the discussion for LNetMDAttach(). - * \param handle On successful returns, a handle to the newly created MD is - * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(), - * and LNetGet() operations. - * - * \retval 0 On success. - * \retval -EINVAL If \a umd is not valid. - * \retval -ENOMEM If new MD cannot be allocated. - * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that - * it's OK to supply a NULL \a umd.eq_handle by calling - * LNetInvalidateHandle() on it. - */ -int -LNetMDBind(struct lnet_md umd, enum lnet_unlink unlink, - struct lnet_handle_md *handle) -{ - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (lnet_md_validate(&umd)) - return -EINVAL; - - if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { - CERROR("Invalid option: GET|PUT illegal on active MDs\n"); - return -EINVAL; - } - - md = lnet_md_alloc(&umd); - if (!md) - return -ENOMEM; - - rc = lnet_md_build(md, &umd, unlink); - if (rc) - goto out_free; - - cpt = lnet_res_lock_current(); - - rc = lnet_md_link(md, umd.eq_handle, cpt); - if (rc) - goto out_unlock; - - lnet_md2handle(handle, md); - - lnet_res_unlock(cpt); - return 0; - -out_unlock: - lnet_res_unlock(cpt); -out_free: - kfree(md); - - return rc; -} -EXPORT_SYMBOL(LNetMDBind); - -/** - * Unlink the memory descriptor from any ME it may be linked to and release - * the internal resources associated with it. As a result, active messages - * associated with the MD may get aborted. - * - * This function does not free the memory region associated with the MD; - * i.e., the memory the user allocated for this MD. If the ME associated with - * this MD is not NULL and was created with auto unlink enabled, the ME is - * unlinked as well (see LNetMEAttach()). - * - * Explicitly unlinking a MD via this function call has the same behavior as - * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK - * is generated in the latter case. - * - * An unlinked event can be reported in two ways: - * - If there's no pending operations on the MD, it's unlinked immediately - * and an LNET_EVENT_UNLINK event is logged before this function returns. - * - Otherwise, the MD is only marked for deletion when this function - * returns, and the unlinked event will be piggybacked on the event of - * the completion of the last operation by setting the unlinked field of - * the event. No dedicated LNET_EVENT_UNLINK event is generated. - * - * Note that in both cases the unlinked field of the event is always set; no - * more event will happen on the MD after such an event is logged. - * - * \param mdh A handle for the MD to be unlinked. - * - * \retval 0 On success. - * \retval -ENOENT If \a mdh does not point to a valid MD object. - */ -int -LNetMDUnlink(struct lnet_handle_md mdh) -{ - struct lnet_event ev; - struct lnet_libmd *md; - int cpt; - - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_cpt_of_cookie(mdh.cookie); - lnet_res_lock(cpt); - - md = lnet_handle2md(&mdh); - if (!md) { - lnet_res_unlock(cpt); - return -ENOENT; - } - - md->md_flags |= LNET_MD_FLAG_ABORTED; - /* - * If the MD is busy, lnet_md_unlink just marks it for deletion, and - * when the LND is done, the completion event flags that the MD was - * unlinked. Otherwise, we enqueue an event now... - */ - if (md->md_eq && !md->md_refcount) { - lnet_build_unlink_event(md, &ev); - lnet_eq_enqueue_event(md->md_eq, &ev); - } - - lnet_md_unlink(md); - - lnet_res_unlock(cpt); - return 0; -} -EXPORT_SYMBOL(LNetMDUnlink); diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c deleted file mode 100644 index 672e37bdd045..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-me.c +++ /dev/null @@ -1,274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-me.c - * - * Match Entry management routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> - -/** - * Create and attach a match entry to the match list of \a portal. The new - * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach() - * can be used to attach a MD to an empty ME. - * - * \param portal The portal table index where the ME should be attached. - * \param match_id Specifies the match criteria for the process ID of - * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be - * used to wildcard either of the identifiers in the lnet_process_id - * structure. - * \param match_bits,ignore_bits Specify the match criteria to apply - * to the match bits in the incoming request. The ignore bits are used - * to mask out insignificant bits in the incoming match bits. The resulting - * bits are then compared to the ME's match bits to determine if the - * incoming request meets the match criteria. - * \param unlink Indicates whether the ME should be unlinked when the memory - * descriptor associated with it is unlinked (Note that the check for - * unlinking a ME only occurs when the memory descriptor is unlinked.). - * Valid values are LNET_RETAIN and LNET_UNLINK. - * \param pos Indicates whether the new ME should be prepended or - * appended to the match list. Allowed constants: LNET_INS_BEFORE, - * LNET_INS_AFTER. - * \param handle On successful returns, a handle to the newly created ME - * object is saved here. This handle can be used later in LNetMEInsert(), - * LNetMEUnlink(), or LNetMDAttach() functions. - * - * \retval 0 On success. - * \retval -EINVAL If \a portal is invalid. - * \retval -ENOMEM If new ME object cannot be allocated. - */ -int -LNetMEAttach(unsigned int portal, - struct lnet_process_id match_id, - __u64 match_bits, __u64 ignore_bits, - enum lnet_unlink unlink, enum lnet_ins_pos pos, - struct lnet_handle_me *handle) -{ - struct lnet_match_table *mtable; - struct lnet_me *me; - struct list_head *head; - - LASSERT(the_lnet.ln_refcount > 0); - - if ((int)portal >= the_lnet.ln_nportals) - return -EINVAL; - - mtable = lnet_mt_of_attach(portal, match_id, - match_bits, ignore_bits, pos); - if (!mtable) /* can't match portal type */ - return -EPERM; - - me = kzalloc(sizeof(*me), GFP_NOFS); - if (!me) - return -ENOMEM; - - lnet_res_lock(mtable->mt_cpt); - - me->me_portal = portal; - me->me_match_id = match_id; - me->me_match_bits = match_bits; - me->me_ignore_bits = ignore_bits; - me->me_unlink = unlink; - me->me_md = NULL; - - lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt], - &me->me_lh); - if (ignore_bits) - head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; - else - head = lnet_mt_match_head(mtable, match_id, match_bits); - - me->me_pos = head - &mtable->mt_mhash[0]; - if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL) - list_add_tail(&me->me_list, head); - else - list_add(&me->me_list, head); - - lnet_me2handle(handle, me); - - lnet_res_unlock(mtable->mt_cpt); - return 0; -} -EXPORT_SYMBOL(LNetMEAttach); - -/** - * Create and a match entry and insert it before or after the ME pointed to by - * \a current_meh. The new ME is empty, i.e. not associated with a memory - * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME. - * - * This function is identical to LNetMEAttach() except for the position - * where the new ME is inserted. - * - * \param current_meh A handle for a ME. The new ME will be inserted - * immediately before or immediately after this ME. - * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion - * for LNetMEAttach(). - * - * \retval 0 On success. - * \retval -ENOMEM If new ME object cannot be allocated. - * \retval -ENOENT If \a current_meh does not point to a valid match entry. - */ -int -LNetMEInsert(struct lnet_handle_me current_meh, - struct lnet_process_id match_id, - __u64 match_bits, __u64 ignore_bits, - enum lnet_unlink unlink, enum lnet_ins_pos pos, - struct lnet_handle_me *handle) -{ - struct lnet_me *current_me; - struct lnet_me *new_me; - struct lnet_portal *ptl; - int cpt; - - LASSERT(the_lnet.ln_refcount > 0); - - if (pos == LNET_INS_LOCAL) - return -EPERM; - - new_me = kzalloc(sizeof(*new_me), GFP_NOFS); - if (!new_me) - return -ENOMEM; - - cpt = lnet_cpt_of_cookie(current_meh.cookie); - - lnet_res_lock(cpt); - - current_me = lnet_handle2me(¤t_meh); - if (!current_me) { - kfree(new_me); - - lnet_res_unlock(cpt); - return -ENOENT; - } - - LASSERT(current_me->me_portal < the_lnet.ln_nportals); - - ptl = the_lnet.ln_portals[current_me->me_portal]; - if (lnet_ptl_is_unique(ptl)) { - /* nosense to insertion on unique portal */ - kfree(new_me); - lnet_res_unlock(cpt); - return -EPERM; - } - - new_me->me_pos = current_me->me_pos; - new_me->me_portal = current_me->me_portal; - new_me->me_match_id = match_id; - new_me->me_match_bits = match_bits; - new_me->me_ignore_bits = ignore_bits; - new_me->me_unlink = unlink; - new_me->me_md = NULL; - - lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh); - - if (pos == LNET_INS_AFTER) - list_add(&new_me->me_list, ¤t_me->me_list); - else - list_add_tail(&new_me->me_list, ¤t_me->me_list); - - lnet_me2handle(handle, new_me); - - lnet_res_unlock(cpt); - - return 0; -} -EXPORT_SYMBOL(LNetMEInsert); - -/** - * Unlink a match entry from its match list. - * - * This operation also releases any resources associated with the ME. If a - * memory descriptor is attached to the ME, then it will be unlinked as well - * and an unlink event will be generated. It is an error to use the ME handle - * after calling LNetMEUnlink(). - * - * \param meh A handle for the ME to be unlinked. - * - * \retval 0 On success. - * \retval -ENOENT If \a meh does not point to a valid ME. - * \see LNetMDUnlink() for the discussion on delivering unlink event. - */ -int -LNetMEUnlink(struct lnet_handle_me meh) -{ - struct lnet_me *me; - struct lnet_libmd *md; - struct lnet_event ev; - int cpt; - - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_cpt_of_cookie(meh.cookie); - lnet_res_lock(cpt); - - me = lnet_handle2me(&meh); - if (!me) { - lnet_res_unlock(cpt); - return -ENOENT; - } - - md = me->me_md; - if (md) { - md->md_flags |= LNET_MD_FLAG_ABORTED; - if (md->md_eq && !md->md_refcount) { - lnet_build_unlink_event(md, &ev); - lnet_eq_enqueue_event(md->md_eq, &ev); - } - } - - lnet_me_unlink(me); - - lnet_res_unlock(cpt); - return 0; -} -EXPORT_SYMBOL(LNetMEUnlink); - -/* call with lnet_res_lock please */ -void -lnet_me_unlink(struct lnet_me *me) -{ - list_del(&me->me_list); - - if (me->me_md) { - struct lnet_libmd *md = me->me_md; - - /* detach MD from portal of this ME */ - lnet_ptl_detach_md(me, md); - lnet_md_unlink(md); - } - - lnet_res_lh_invalidate(&me->me_lh); - kfree(me); -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c deleted file mode 100644 index ed43b3f4b114..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ /dev/null @@ -1,2388 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-move.c - * - * Data movement routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> -#include <linux/nsproxy.h> -#include <net/net_namespace.h> - -static int local_nid_dist_zero = 1; -module_param(local_nid_dist_zero, int, 0444); -MODULE_PARM_DESC(local_nid_dist_zero, "Reserved"); - -int -lnet_fail_nid(lnet_nid_t nid, unsigned int threshold) -{ - struct lnet_test_peer *tp; - struct lnet_test_peer *temp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - - /* NB: use lnet_net_lock(0) to serialize operations on test peers */ - if (threshold) { - /* Adding a new entry */ - tp = kzalloc(sizeof(*tp), GFP_NOFS); - if (!tp) - return -ENOMEM; - - tp->tp_nid = nid; - tp->tp_threshold = threshold; - - lnet_net_lock(0); - list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers); - lnet_net_unlock(0); - return 0; - } - - /* removing entries */ - INIT_LIST_HEAD(&cull); - - lnet_net_lock(0); - - list_for_each_safe(el, next, &the_lnet.ln_test_peers) { - tp = list_entry(el, struct lnet_test_peer, tp_list); - - if (!tp->tp_threshold || /* needs culling anyway */ - nid == LNET_NID_ANY || /* removing all entries */ - tp->tp_nid == nid) { /* matched this one */ - list_del(&tp->tp_list); - list_add(&tp->tp_list, &cull); - } - } - - lnet_net_unlock(0); - - list_for_each_entry_safe(tp, temp, &cull, tp_list) { - list_del(&tp->tp_list); - kfree(tp); - } - return 0; -} - -static int -fail_peer(lnet_nid_t nid, int outgoing) -{ - struct lnet_test_peer *tp; - struct lnet_test_peer *temp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - int fail = 0; - - INIT_LIST_HEAD(&cull); - - /* NB: use lnet_net_lock(0) to serialize operations on test peers */ - lnet_net_lock(0); - - list_for_each_safe(el, next, &the_lnet.ln_test_peers) { - tp = list_entry(el, struct lnet_test_peer, tp_list); - - if (!tp->tp_threshold) { - /* zombie entry */ - if (outgoing) { - /* - * only cull zombies on outgoing tests, - * since we may be at interrupt priority on - * incoming messages. - */ - list_del(&tp->tp_list); - list_add(&tp->tp_list, &cull); - } - continue; - } - - if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */ - nid == tp->tp_nid) { /* fail this peer */ - fail = 1; - - if (tp->tp_threshold != LNET_MD_THRESH_INF) { - tp->tp_threshold--; - if (outgoing && - !tp->tp_threshold) { - /* see above */ - list_del(&tp->tp_list); - list_add(&tp->tp_list, &cull); - } - } - break; - } - } - - lnet_net_unlock(0); - - list_for_each_entry_safe(tp, temp, &cull, tp_list) { - list_del(&tp->tp_list); - - kfree(tp); - } - - return fail; -} - -unsigned int -lnet_iov_nob(unsigned int niov, struct kvec *iov) -{ - unsigned int nob = 0; - - LASSERT(!niov || iov); - while (niov-- > 0) - nob += (iov++)->iov_len; - - return nob; -} -EXPORT_SYMBOL(lnet_iov_nob); - -void -lnet_copy_iov2iter(struct iov_iter *to, - unsigned int nsiov, const struct kvec *siov, - unsigned int soffset, unsigned int nob) -{ - /* NB diov, siov are READ-ONLY */ - const char *s; - size_t left; - - if (!nob) - return; - - /* skip complete frags before 'soffset' */ - LASSERT(nsiov > 0); - while (soffset >= siov->iov_len) { - soffset -= siov->iov_len; - siov++; - nsiov--; - LASSERT(nsiov > 0); - } - - s = (char *)siov->iov_base + soffset; - left = siov->iov_len - soffset; - do { - size_t n, copy = left; - - LASSERT(nsiov > 0); - - if (copy > nob) - copy = nob; - n = copy_to_iter(s, copy, to); - if (n != copy) - return; - nob -= n; - - siov++; - s = (char *)siov->iov_base; - left = siov->iov_len; - nsiov--; - } while (nob > 0); -} -EXPORT_SYMBOL(lnet_copy_iov2iter); - -void -lnet_copy_kiov2iter(struct iov_iter *to, - unsigned int nsiov, const struct bio_vec *siov, - unsigned int soffset, unsigned int nob) -{ - if (!nob) - return; - - LASSERT(!in_interrupt()); - - LASSERT(nsiov > 0); - while (soffset >= siov->bv_len) { - soffset -= siov->bv_len; - siov++; - nsiov--; - LASSERT(nsiov > 0); - } - - do { - size_t copy = siov->bv_len - soffset, n; - - LASSERT(nsiov > 0); - - if (copy > nob) - copy = nob; - n = copy_page_to_iter(siov->bv_page, - siov->bv_offset + soffset, - copy, to); - if (n != copy) - return; - nob -= n; - siov++; - nsiov--; - soffset = 0; - } while (nob > 0); -} -EXPORT_SYMBOL(lnet_copy_kiov2iter); - -int -lnet_extract_iov(int dst_niov, struct kvec *dst, - int src_niov, const struct kvec *src, - unsigned int offset, unsigned int len) -{ - /* - * Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' - */ - unsigned int frag_len; - unsigned int niov; - - if (!len) /* no data => */ - return 0; /* no frags */ - - LASSERT(src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT(src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT(src_niov > 0); - LASSERT((int)niov <= dst_niov); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return niov; - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -EXPORT_SYMBOL(lnet_extract_iov); - -unsigned int -lnet_kiov_nob(unsigned int niov, struct bio_vec *kiov) -{ - unsigned int nob = 0; - - LASSERT(!niov || kiov); - while (niov-- > 0) - nob += (kiov++)->bv_len; - - return nob; -} -EXPORT_SYMBOL(lnet_kiov_nob); - -int -lnet_extract_kiov(int dst_niov, struct bio_vec *dst, - int src_niov, const struct bio_vec *src, - unsigned int offset, unsigned int len) -{ - /* - * Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' - */ - unsigned int frag_len; - unsigned int niov; - - if (!len) /* no data => */ - return 0; /* no frags */ - - LASSERT(src_niov > 0); - while (offset >= src->bv_len) { /* skip initial frags */ - offset -= src->bv_len; - src_niov--; - src++; - LASSERT(src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT(src_niov > 0); - LASSERT((int)niov <= dst_niov); - - frag_len = src->bv_len - offset; - dst->bv_page = src->bv_page; - dst->bv_offset = src->bv_offset + offset; - - if (len <= frag_len) { - dst->bv_len = len; - LASSERT(dst->bv_offset + dst->bv_len - <= PAGE_SIZE); - return niov; - } - - dst->bv_len = frag_len; - LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE); - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -EXPORT_SYMBOL(lnet_extract_kiov); - -void -lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, - int delayed, unsigned int offset, unsigned int mlen, - unsigned int rlen) -{ - unsigned int niov = 0; - struct kvec *iov = NULL; - struct bio_vec *kiov = NULL; - struct iov_iter to; - int rc; - - LASSERT(!in_interrupt()); - LASSERT(!mlen || msg); - - if (msg) { - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_sending); - LASSERT(rlen == msg->msg_len); - LASSERT(mlen <= msg->msg_len); - LASSERT(msg->msg_offset == offset); - LASSERT(msg->msg_wanted == mlen); - - msg->msg_receiving = 0; - - if (mlen) { - niov = msg->msg_niov; - iov = msg->msg_iov; - kiov = msg->msg_kiov; - - LASSERT(niov > 0); - LASSERT(!iov != !kiov); - } - } - - if (iov) { - iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset); - iov_iter_advance(&to, offset); - } else { - iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset); - iov_iter_advance(&to, offset); - } - rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -static void -lnet_setpayloadbuffer(struct lnet_msg *msg) -{ - struct lnet_libmd *md = msg->msg_md; - - LASSERT(msg->msg_len > 0); - LASSERT(!msg->msg_routing); - LASSERT(md); - LASSERT(!msg->msg_niov); - LASSERT(!msg->msg_iov); - LASSERT(!msg->msg_kiov); - - msg->msg_niov = md->md_niov; - if (md->md_options & LNET_MD_KIOV) - msg->msg_kiov = md->md_iov.kiov; - else - msg->msg_iov = md->md_iov.iov; -} - -void -lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target, - unsigned int offset, unsigned int len) -{ - msg->msg_type = type; - msg->msg_target = target; - msg->msg_len = len; - msg->msg_offset = offset; - - if (len) - lnet_setpayloadbuffer(msg); - - memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr)); - msg->msg_hdr.type = cpu_to_le32(type); - msg->msg_hdr.dest_nid = cpu_to_le64(target.nid); - msg->msg_hdr.dest_pid = cpu_to_le32(target.pid); - /* src_nid will be set later */ - msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid); - msg->msg_hdr.payload_length = cpu_to_le32(len); -} - -static void -lnet_ni_send(struct lnet_ni *ni, struct lnet_msg *msg) -{ - void *priv = msg->msg_private; - int rc; - - LASSERT(!in_interrupt()); - LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND || - (msg->msg_txcredit && msg->msg_peertxcredit)); - - rc = ni->ni_lnd->lnd_send(ni, priv, msg); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -static int -lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg) -{ - int rc; - - LASSERT(!msg->msg_sending); - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_rx_ready_delay); - LASSERT(ni->ni_lnd->lnd_eager_recv); - - msg->msg_rx_ready_delay = 1; - rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg, - &msg->msg_private); - if (rc) { - CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n", - libcfs_nid2str(msg->msg_rxpeer->lp_nid), - libcfs_id2str(msg->msg_target), rc); - LASSERT(rc < 0); /* required by my callers */ - } - - return rc; -} - -/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */ -static void -lnet_ni_query_locked(struct lnet_ni *ni, struct lnet_peer *lp) -{ - unsigned long last_alive = 0; - - LASSERT(lnet_peer_aliveness_enabled(lp)); - LASSERT(ni->ni_lnd->lnd_query); - - lnet_net_unlock(lp->lp_cpt); - ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive); - lnet_net_lock(lp->lp_cpt); - - lp->lp_last_query = cfs_time_current(); - - if (last_alive) /* NI has updated timestamp */ - lp->lp_last_alive = last_alive; -} - -/* NB: always called with lnet_net_lock held */ -static inline int -lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now) -{ - int alive; - unsigned long deadline; - - LASSERT(lnet_peer_aliveness_enabled(lp)); - - /* Trust lnet_notify() if it has more recent aliveness news, but - * ignore the initial assumed death (see lnet_peers_start_down()). - */ - if (!lp->lp_alive && lp->lp_alive_count > 0 && - cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive)) - return 0; - - deadline = cfs_time_add(lp->lp_last_alive, - lp->lp_ni->ni_peertimeout * HZ); - alive = cfs_time_after(deadline, now); - - /* Update obsolete lp_alive except for routers assumed to be dead - * initially, because router checker would update aliveness in this - * case, and moreover lp_last_alive at peer creation is assumed. - */ - if (alive && !lp->lp_alive && - !(lnet_isrouter(lp) && !lp->lp_alive_count)) - lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); - - return alive; -} - -/* - * NB: returns 1 when alive, 0 when dead, negative when error; - * may drop the lnet_net_lock - */ -static int -lnet_peer_alive_locked(struct lnet_peer *lp) -{ - unsigned long now = cfs_time_current(); - - if (!lnet_peer_aliveness_enabled(lp)) - return -ENODEV; - - if (lnet_peer_is_alive(lp, now)) - return 1; - - /* - * Peer appears dead, but we should avoid frequent NI queries (at - * most once per lnet_queryinterval seconds). - */ - if (lp->lp_last_query) { - static const int lnet_queryinterval = 1; - - unsigned long next_query = - cfs_time_add(lp->lp_last_query, - lnet_queryinterval * HZ); - - if (time_before(now, next_query)) { - if (lp->lp_alive) - CWARN("Unexpected aliveness of peer %s: %d < %d (%d/%d)\n", - libcfs_nid2str(lp->lp_nid), - (int)now, (int)next_query, - lnet_queryinterval, - lp->lp_ni->ni_peertimeout); - return 0; - } - } - - /* query NI for latest aliveness news */ - lnet_ni_query_locked(lp->lp_ni, lp); - - if (lnet_peer_is_alive(lp, now)) - return 1; - - lnet_notify_locked(lp, 0, 0, lp->lp_last_alive); - return 0; -} - -/** - * \param msg The message to be sent. - * \param do_send True if lnet_ni_send() should be called in this function. - * lnet_send() is going to lnet_net_unlock immediately after this, so - * it sets do_send FALSE and I don't do the unlock/send/lock bit. - * - * \retval LNET_CREDIT_OK If \a msg sent or OK to send. - * \retval LNET_CREDIT_WAIT If \a msg blocked for credit. - * \retval -EHOSTUNREACH If the next hop of the message appears dead. - * \retval -ECANCELED If the MD of the message has been unlinked. - */ -static int -lnet_post_send_locked(struct lnet_msg *msg, int do_send) -{ - struct lnet_peer *lp = msg->msg_txpeer; - struct lnet_ni *ni = lp->lp_ni; - int cpt = msg->msg_tx_cpt; - struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt]; - - /* non-lnet_send() callers have checked before */ - LASSERT(!do_send || msg->msg_tx_delayed); - LASSERT(!msg->msg_receiving); - LASSERT(msg->msg_tx_committed); - - /* NB 'lp' is always the next hop */ - if (!(msg->msg_target.pid & LNET_PID_USERFLAG) && - !lnet_peer_alive_locked(lp)) { - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += msg->msg_len; - lnet_net_unlock(cpt); - - CNETERR("Dropping message for %s: peer not alive\n", - libcfs_id2str(msg->msg_target)); - if (do_send) - lnet_finalize(ni, msg, -EHOSTUNREACH); - - lnet_net_lock(cpt); - return -EHOSTUNREACH; - } - - if (msg->msg_md && - (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) { - lnet_net_unlock(cpt); - - CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n", - libcfs_id2str(msg->msg_target)); - if (do_send) - lnet_finalize(ni, msg, -ECANCELED); - - lnet_net_lock(cpt); - return -ECANCELED; - } - - if (!msg->msg_peertxcredit) { - LASSERT((lp->lp_txcredits < 0) == - !list_empty(&lp->lp_txq)); - - msg->msg_peertxcredit = 1; - lp->lp_txqnob += msg->msg_len + sizeof(struct lnet_hdr); - lp->lp_txcredits--; - - if (lp->lp_txcredits < lp->lp_mintxcredits) - lp->lp_mintxcredits = lp->lp_txcredits; - - if (lp->lp_txcredits < 0) { - msg->msg_tx_delayed = 1; - list_add_tail(&msg->msg_list, &lp->lp_txq); - return LNET_CREDIT_WAIT; - } - } - - if (!msg->msg_txcredit) { - LASSERT((tq->tq_credits < 0) == - !list_empty(&tq->tq_delayed)); - - msg->msg_txcredit = 1; - tq->tq_credits--; - - if (tq->tq_credits < tq->tq_credits_min) - tq->tq_credits_min = tq->tq_credits; - - if (tq->tq_credits < 0) { - msg->msg_tx_delayed = 1; - list_add_tail(&msg->msg_list, &tq->tq_delayed); - return LNET_CREDIT_WAIT; - } - } - - if (do_send) { - lnet_net_unlock(cpt); - lnet_ni_send(ni, msg); - lnet_net_lock(cpt); - } - return LNET_CREDIT_OK; -} - -static struct lnet_rtrbufpool * -lnet_msg2bufpool(struct lnet_msg *msg) -{ - struct lnet_rtrbufpool *rbp; - int cpt; - - LASSERT(msg->msg_rx_committed); - - cpt = msg->msg_rx_cpt; - rbp = &the_lnet.ln_rtrpools[cpt][0]; - - LASSERT(msg->msg_len <= LNET_MTU); - while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) { - rbp++; - LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]); - } - - return rbp; -} - -static int -lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv) -{ - /* - * lnet_parse is going to lnet_net_unlock immediately after this, so it - * sets do_recv FALSE and I don't do the unlock/send/lock bit. - * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if - * received or OK to receive - */ - struct lnet_peer *lp = msg->msg_rxpeer; - struct lnet_rtrbufpool *rbp; - struct lnet_rtrbuf *rb; - - LASSERT(!msg->msg_iov); - LASSERT(!msg->msg_kiov); - LASSERT(!msg->msg_niov); - LASSERT(msg->msg_routing); - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_sending); - - /* non-lnet_parse callers only receive delayed messages */ - LASSERT(!do_recv || msg->msg_rx_delayed); - - if (!msg->msg_peerrtrcredit) { - LASSERT((lp->lp_rtrcredits < 0) == - !list_empty(&lp->lp_rtrq)); - - msg->msg_peerrtrcredit = 1; - lp->lp_rtrcredits--; - if (lp->lp_rtrcredits < lp->lp_minrtrcredits) - lp->lp_minrtrcredits = lp->lp_rtrcredits; - - if (lp->lp_rtrcredits < 0) { - /* must have checked eager_recv before here */ - LASSERT(msg->msg_rx_ready_delay); - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, &lp->lp_rtrq); - return LNET_CREDIT_WAIT; - } - } - - rbp = lnet_msg2bufpool(msg); - - if (!msg->msg_rtrcredit) { - msg->msg_rtrcredit = 1; - rbp->rbp_credits--; - if (rbp->rbp_credits < rbp->rbp_mincredits) - rbp->rbp_mincredits = rbp->rbp_credits; - - if (rbp->rbp_credits < 0) { - /* must have checked eager_recv before here */ - LASSERT(msg->msg_rx_ready_delay); - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, &rbp->rbp_msgs); - return LNET_CREDIT_WAIT; - } - } - - LASSERT(!list_empty(&rbp->rbp_bufs)); - rb = list_entry(rbp->rbp_bufs.next, struct lnet_rtrbuf, rb_list); - list_del(&rb->rb_list); - - msg->msg_niov = rbp->rbp_npages; - msg->msg_kiov = &rb->rb_kiov[0]; - - if (do_recv) { - int cpt = msg->msg_rx_cpt; - - lnet_net_unlock(cpt); - lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1, - 0, msg->msg_len, msg->msg_len); - lnet_net_lock(cpt); - } - return LNET_CREDIT_OK; -} - -void -lnet_return_tx_credits_locked(struct lnet_msg *msg) -{ - struct lnet_peer *txpeer = msg->msg_txpeer; - struct lnet_msg *msg2; - - if (msg->msg_txcredit) { - struct lnet_ni *ni = txpeer->lp_ni; - struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt]; - - /* give back NI txcredits */ - msg->msg_txcredit = 0; - - LASSERT((tq->tq_credits < 0) == - !list_empty(&tq->tq_delayed)); - - tq->tq_credits++; - if (tq->tq_credits <= 0) { - msg2 = list_entry(tq->tq_delayed.next, - struct lnet_msg, msg_list); - list_del(&msg2->msg_list); - - LASSERT(msg2->msg_txpeer->lp_ni == ni); - LASSERT(msg2->msg_tx_delayed); - - (void)lnet_post_send_locked(msg2, 1); - } - } - - if (msg->msg_peertxcredit) { - /* give back peer txcredits */ - msg->msg_peertxcredit = 0; - - LASSERT((txpeer->lp_txcredits < 0) == - !list_empty(&txpeer->lp_txq)); - - txpeer->lp_txqnob -= msg->msg_len + sizeof(struct lnet_hdr); - LASSERT(txpeer->lp_txqnob >= 0); - - txpeer->lp_txcredits++; - if (txpeer->lp_txcredits <= 0) { - msg2 = list_entry(txpeer->lp_txq.next, - struct lnet_msg, msg_list); - list_del(&msg2->msg_list); - - LASSERT(msg2->msg_txpeer == txpeer); - LASSERT(msg2->msg_tx_delayed); - - (void)lnet_post_send_locked(msg2, 1); - } - } - - if (txpeer) { - msg->msg_txpeer = NULL; - lnet_peer_decref_locked(txpeer); - } -} - -void -lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp) -{ - struct lnet_msg *msg; - - if (list_empty(&rbp->rbp_msgs)) - return; - msg = list_entry(rbp->rbp_msgs.next, - struct lnet_msg, msg_list); - list_del(&msg->msg_list); - - (void)lnet_post_routed_recv_locked(msg, 1); -} - -void -lnet_drop_routed_msgs_locked(struct list_head *list, int cpt) -{ - struct list_head drop; - struct lnet_msg *msg; - struct lnet_msg *tmp; - - INIT_LIST_HEAD(&drop); - - list_splice_init(list, &drop); - - lnet_net_unlock(cpt); - - list_for_each_entry_safe(msg, tmp, &drop, msg_list) { - lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL, - 0, 0, 0, msg->msg_hdr.payload_length); - list_del_init(&msg->msg_list); - lnet_finalize(NULL, msg, -ECANCELED); - } - - lnet_net_lock(cpt); -} - -void -lnet_return_rx_credits_locked(struct lnet_msg *msg) -{ - struct lnet_peer *rxpeer = msg->msg_rxpeer; - struct lnet_msg *msg2; - - if (msg->msg_rtrcredit) { - /* give back global router credits */ - struct lnet_rtrbuf *rb; - struct lnet_rtrbufpool *rbp; - - /* - * NB If a msg ever blocks for a buffer in rbp_msgs, it stays - * there until it gets one allocated, or aborts the wait - * itself - */ - LASSERT(msg->msg_kiov); - - rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]); - rbp = rb->rb_pool; - - msg->msg_kiov = NULL; - msg->msg_rtrcredit = 0; - - LASSERT(rbp == lnet_msg2bufpool(msg)); - - LASSERT((rbp->rbp_credits > 0) == - !list_empty(&rbp->rbp_bufs)); - - /* - * If routing is now turned off, we just drop this buffer and - * don't bother trying to return credits. - */ - if (!the_lnet.ln_routing) { - lnet_destroy_rtrbuf(rb, rbp->rbp_npages); - goto routing_off; - } - - /* - * It is possible that a user has lowered the desired number of - * buffers in this pool. Make sure we never put back - * more buffers than the stated number. - */ - if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) { - /* Discard this buffer so we don't have too many. */ - lnet_destroy_rtrbuf(rb, rbp->rbp_npages); - rbp->rbp_nbuffers--; - } else { - list_add(&rb->rb_list, &rbp->rbp_bufs); - rbp->rbp_credits++; - if (rbp->rbp_credits <= 0) - lnet_schedule_blocked_locked(rbp); - } - } - -routing_off: - if (msg->msg_peerrtrcredit) { - /* give back peer router credits */ - msg->msg_peerrtrcredit = 0; - - LASSERT((rxpeer->lp_rtrcredits < 0) == - !list_empty(&rxpeer->lp_rtrq)); - - rxpeer->lp_rtrcredits++; - /* - * drop all messages which are queued to be routed on that - * peer. - */ - if (!the_lnet.ln_routing) { - lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq, - msg->msg_rx_cpt); - } else if (rxpeer->lp_rtrcredits <= 0) { - msg2 = list_entry(rxpeer->lp_rtrq.next, - struct lnet_msg, msg_list); - list_del(&msg2->msg_list); - - (void)lnet_post_routed_recv_locked(msg2, 1); - } - } - if (rxpeer) { - msg->msg_rxpeer = NULL; - lnet_peer_decref_locked(rxpeer); - } -} - -static int -lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2) -{ - struct lnet_peer *p1 = r1->lr_gateway; - struct lnet_peer *p2 = r2->lr_gateway; - int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops; - int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops; - - if (r1->lr_priority < r2->lr_priority) - return 1; - - if (r1->lr_priority > r2->lr_priority) - return -ERANGE; - - if (r1_hops < r2_hops) - return 1; - - if (r1_hops > r2_hops) - return -ERANGE; - - if (p1->lp_txqnob < p2->lp_txqnob) - return 1; - - if (p1->lp_txqnob > p2->lp_txqnob) - return -ERANGE; - - if (p1->lp_txcredits > p2->lp_txcredits) - return 1; - - if (p1->lp_txcredits < p2->lp_txcredits) - return -ERANGE; - - if (r1->lr_seq - r2->lr_seq <= 0) - return 1; - - return -ERANGE; -} - -static struct lnet_peer * -lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target, - lnet_nid_t rtr_nid) -{ - struct lnet_remotenet *rnet; - struct lnet_route *route; - struct lnet_route *best_route; - struct lnet_route *last_route; - struct lnet_peer *lp_best; - struct lnet_peer *lp; - int rc; - - /* - * If @rtr_nid is not LNET_NID_ANY, return the gateway with - * rtr_nid nid, otherwise find the best gateway I can use - */ - rnet = lnet_find_net_locked(LNET_NIDNET(target)); - if (!rnet) - return NULL; - - lp_best = NULL; - best_route = NULL; - last_route = NULL; - list_for_each_entry(route, &rnet->lrn_routes, lr_list) { - lp = route->lr_gateway; - - if (!lnet_is_route_alive(route)) - continue; - - if (ni && lp->lp_ni != ni) - continue; - - if (lp->lp_nid == rtr_nid) /* it's pre-determined router */ - return lp; - - if (!lp_best) { - best_route = route; - last_route = route; - lp_best = lp; - continue; - } - - /* no protection on below fields, but it's harmless */ - if (last_route->lr_seq - route->lr_seq < 0) - last_route = route; - - rc = lnet_compare_routes(route, best_route); - if (rc < 0) - continue; - - best_route = route; - lp_best = lp; - } - - /* - * set sequence number on the best router to the latest sequence + 1 - * so we can round-robin all routers, it's race and inaccurate but - * harmless and functional - */ - if (best_route) - best_route->lr_seq = last_route->lr_seq + 1; - return lp_best; -} - -int -lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid) -{ - lnet_nid_t dst_nid = msg->msg_target.nid; - struct lnet_ni *src_ni; - struct lnet_ni *local_ni; - struct lnet_peer *lp; - int cpt; - int cpt2; - int rc; - - /* - * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases, - * but we might want to use pre-determined router for ACK/REPLY - * in the future - */ - /* NB: ni == interface pre-determined (ACK/REPLY) */ - LASSERT(!msg->msg_txpeer); - LASSERT(!msg->msg_sending); - LASSERT(!msg->msg_target_is_router); - LASSERT(!msg->msg_receiving); - - msg->msg_sending = 1; - - LASSERT(!msg->msg_tx_committed); - cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid); - again: - lnet_net_lock(cpt); - - if (the_lnet.ln_shutdown) { - lnet_net_unlock(cpt); - return -ESHUTDOWN; - } - - if (src_nid == LNET_NID_ANY) { - src_ni = NULL; - } else { - src_ni = lnet_nid2ni_locked(src_nid, cpt); - if (!src_ni) { - lnet_net_unlock(cpt); - LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n", - libcfs_nid2str(dst_nid), - libcfs_nid2str(src_nid)); - return -EINVAL; - } - LASSERT(!msg->msg_routing); - } - - /* Is this for someone on a local network? */ - local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt); - - if (local_ni) { - if (!src_ni) { - src_ni = local_ni; - src_nid = src_ni->ni_nid; - } else if (src_ni == local_ni) { - lnet_ni_decref_locked(local_ni, cpt); - } else { - lnet_ni_decref_locked(local_ni, cpt); - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - LCONSOLE_WARN("No route to %s via from %s\n", - libcfs_nid2str(dst_nid), - libcfs_nid2str(src_nid)); - return -EINVAL; - } - - LASSERT(src_nid != LNET_NID_ANY); - lnet_msg_commit(msg, cpt); - - if (!msg->msg_routing) - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - - if (src_ni == the_lnet.ln_loni) { - /* No send credit hassles with LOLND */ - lnet_net_unlock(cpt); - lnet_ni_send(src_ni, msg); - - lnet_net_lock(cpt); - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - return 0; - } - - rc = lnet_nid2peer_locked(&lp, dst_nid, cpt); - /* lp has ref on src_ni; lose mine */ - lnet_ni_decref_locked(src_ni, cpt); - if (rc) { - lnet_net_unlock(cpt); - LCONSOLE_WARN("Error %d finding peer %s\n", rc, - libcfs_nid2str(dst_nid)); - /* ENOMEM or shutting down */ - return rc; - } - LASSERT(lp->lp_ni == src_ni); - } else { - /* sending to a remote network */ - lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid); - if (!lp) { - if (src_ni) - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - - LCONSOLE_WARN("No route to %s via %s (all routers down)\n", - libcfs_id2str(msg->msg_target), - libcfs_nid2str(src_nid)); - return -EHOSTUNREACH; - } - - /* - * rtr_nid is LNET_NID_ANY or NID of pre-determined router, - * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't - * pre-determined router, this can happen if router table - * was changed when we release the lock - */ - if (rtr_nid != lp->lp_nid) { - cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid); - if (cpt2 != cpt) { - if (src_ni) - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - - rtr_nid = lp->lp_nid; - cpt = cpt2; - goto again; - } - } - - CDEBUG(D_NET, "Best route to %s via %s for %s %d\n", - libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid), - lnet_msgtyp2str(msg->msg_type), msg->msg_len); - - if (!src_ni) { - src_ni = lp->lp_ni; - src_nid = src_ni->ni_nid; - } else { - LASSERT(src_ni == lp->lp_ni); - lnet_ni_decref_locked(src_ni, cpt); - } - - lnet_peer_addref_locked(lp); - - LASSERT(src_nid != LNET_NID_ANY); - lnet_msg_commit(msg, cpt); - - if (!msg->msg_routing) { - /* I'm the source and now I know which NI to send on */ - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - } - - msg->msg_target_is_router = 1; - msg->msg_target.nid = lp->lp_nid; - msg->msg_target.pid = LNET_PID_LUSTRE; - } - - /* 'lp' is our best choice of peer */ - - LASSERT(!msg->msg_peertxcredit); - LASSERT(!msg->msg_txcredit); - LASSERT(!msg->msg_txpeer); - - msg->msg_txpeer = lp; /* msg takes my ref on lp */ - - rc = lnet_post_send_locked(msg, 0); - lnet_net_unlock(cpt); - - if (rc < 0) - return rc; - - if (rc == LNET_CREDIT_OK) - lnet_ni_send(src_ni, msg); - - return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */ -} - -void -lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob) -{ - lnet_net_lock(cpt); - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += nob; - lnet_net_unlock(cpt); - - lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob); -} - -static void -lnet_recv_put(struct lnet_ni *ni, struct lnet_msg *msg) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - - if (msg->msg_wanted) - lnet_setpayloadbuffer(msg); - - lnet_build_msg_event(msg, LNET_EVENT_PUT); - - /* - * Must I ACK? If so I'll grab the ack_wmd out of the header and put - * it back into the ACK during lnet_finalize() - */ - msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE); - - lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed, - msg->msg_offset, msg->msg_wanted, hdr->payload_length); -} - -static int -lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_match_info info; - bool ready_delay; - int rc; - - /* Convert put fields to host byte order */ - le64_to_cpus(&hdr->msg.put.match_bits); - le32_to_cpus(&hdr->msg.put.ptl_index); - le32_to_cpus(&hdr->msg.put.offset); - - info.mi_id.nid = hdr->src_nid; - info.mi_id.pid = hdr->src_pid; - info.mi_opc = LNET_MD_OP_PUT; - info.mi_portal = hdr->msg.put.ptl_index; - info.mi_rlength = hdr->payload_length; - info.mi_roffset = hdr->msg.put.offset; - info.mi_mbits = hdr->msg.put.match_bits; - - msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv; - ready_delay = msg->msg_rx_ready_delay; - - again: - rc = lnet_ptl_match_md(&info, msg); - switch (rc) { - default: - LBUG(); - - case LNET_MATCHMD_OK: - lnet_recv_put(ni, msg); - return 0; - - case LNET_MATCHMD_NONE: - /** - * no eager_recv or has already called it, should - * have been attached on delayed list - */ - if (ready_delay) - return 0; - - rc = lnet_ni_eager_recv(ni, msg); - if (!rc) { - ready_delay = true; - goto again; - } - /* fall through */ - - case LNET_MATCHMD_DROP: - CNETERR("Dropping PUT from %s portal %d match %llu offset %d length %d: %d\n", - libcfs_id2str(info.mi_id), info.mi_portal, - info.mi_mbits, info.mi_roffset, info.mi_rlength, rc); - - return -ENOENT; /* -ve: OK but no match */ - } -} - -static int -lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get) -{ - struct lnet_match_info info; - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_handle_wire reply_wmd; - int rc; - - /* Convert get fields to host byte order */ - le64_to_cpus(&hdr->msg.get.match_bits); - le32_to_cpus(&hdr->msg.get.ptl_index); - le32_to_cpus(&hdr->msg.get.sink_length); - le32_to_cpus(&hdr->msg.get.src_offset); - - info.mi_id.nid = hdr->src_nid; - info.mi_id.pid = hdr->src_pid; - info.mi_opc = LNET_MD_OP_GET; - info.mi_portal = hdr->msg.get.ptl_index; - info.mi_rlength = hdr->msg.get.sink_length; - info.mi_roffset = hdr->msg.get.src_offset; - info.mi_mbits = hdr->msg.get.match_bits; - - rc = lnet_ptl_match_md(&info, msg); - if (rc == LNET_MATCHMD_DROP) { - CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n", - libcfs_id2str(info.mi_id), info.mi_portal, - info.mi_mbits, info.mi_roffset, info.mi_rlength); - return -ENOENT; /* -ve: OK but no match */ - } - - LASSERT(rc == LNET_MATCHMD_OK); - - lnet_build_msg_event(msg, LNET_EVENT_GET); - - reply_wmd = hdr->msg.get.return_wmd; - - lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id, - msg->msg_offset, msg->msg_wanted); - - msg->msg_hdr.msg.reply.dst_wmd = reply_wmd; - - if (rdma_get) { - /* The LND completes the REPLY from her recv procedure */ - lnet_ni_recv(ni, msg->msg_private, msg, 0, - msg->msg_offset, msg->msg_len, msg->msg_len); - return 0; - } - - lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0); - msg->msg_receiving = 0; - - rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY); - if (rc < 0) { - /* didn't get as far as lnet_ni_send() */ - CERROR("%s: Unable to send REPLY for GET from %s: %d\n", - libcfs_nid2str(ni->ni_nid), - libcfs_id2str(info.mi_id), rc); - - lnet_finalize(ni, msg, rc); - } - - return 0; -} - -static int -lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg) -{ - void *private = msg->msg_private; - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_process_id src = {0}; - struct lnet_libmd *md; - int rlength; - int mlength; - int cpt; - - cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie); - lnet_res_lock(cpt); - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd); - if (!md || !md->md_threshold || md->md_me) { - CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - !md ? "invalid" : "inactive", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - if (md && md->md_me) - CERROR("REPLY MD also attached to portal %d\n", - md->md_me->me_portal); - - lnet_res_unlock(cpt); - return -ENOENT; /* -ve: OK but no match */ - } - - LASSERT(!md->md_offset); - - rlength = hdr->payload_length; - mlength = min_t(uint, rlength, md->md_length); - - if (mlength < rlength && - !(md->md_options & LNET_MD_TRUNCATE)) { - CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - rlength, hdr->msg.reply.dst_wmd.wh_object_cookie, - mlength); - lnet_res_unlock(cpt); - return -ENOENT; /* -ve: OK but no match */ - } - - CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie); - - lnet_msg_attach_md(msg, md, 0, mlength); - - if (mlength) - lnet_setpayloadbuffer(msg); - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_REPLY); - - lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength); - return 0; -} - -static int -lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_process_id src = {0}; - struct lnet_libmd *md; - int cpt; - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - /* Convert ack fields to host byte order */ - le64_to_cpus(&hdr->msg.ack.match_bits); - le32_to_cpus(&hdr->msg.ack.mlength); - - cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie); - lnet_res_lock(cpt); - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd); - if (!md || !md->md_threshold || md->md_me) { - /* Don't moan; this is expected */ - CDEBUG(D_NET, - "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - !md ? "invalid" : "inactive", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie); - if (md && md->md_me) - CERROR("Source MD also attached to portal %d\n", - md->md_me->me_portal); - - lnet_res_unlock(cpt); - return -ENOENT; /* -ve! */ - } - - CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - hdr->msg.ack.dst_wmd.wh_object_cookie); - - lnet_msg_attach_md(msg, md, 0, 0); - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_ACK); - - lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len); - return 0; -} - -/** - * \retval LNET_CREDIT_OK If \a msg is forwarded - * \retval LNET_CREDIT_WAIT If \a msg is blocked because w/o buffer - * \retval -ve error code - */ -int -lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg) -{ - int rc = 0; - - if (!the_lnet.ln_routing) - return -ECANCELED; - - if (msg->msg_rxpeer->lp_rtrcredits <= 0 || - lnet_msg2bufpool(msg)->rbp_credits <= 0) { - if (!ni->ni_lnd->lnd_eager_recv) { - msg->msg_rx_ready_delay = 1; - } else { - lnet_net_unlock(msg->msg_rx_cpt); - rc = lnet_ni_eager_recv(ni, msg); - lnet_net_lock(msg->msg_rx_cpt); - } - } - - if (!rc) - rc = lnet_post_routed_recv_locked(msg, 0); - return rc; -} - -int -lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg) -{ - int rc; - - switch (msg->msg_type) { - case LNET_MSG_ACK: - rc = lnet_parse_ack(ni, msg); - break; - case LNET_MSG_PUT: - rc = lnet_parse_put(ni, msg); - break; - case LNET_MSG_GET: - rc = lnet_parse_get(ni, msg, msg->msg_rdma_get); - break; - case LNET_MSG_REPLY: - rc = lnet_parse_reply(ni, msg); - break; - default: /* prevent an unused label if !kernel */ - LASSERT(0); - return -EPROTO; - } - - LASSERT(!rc || rc == -ENOENT); - return rc; -} - -char * -lnet_msgtyp2str(int type) -{ - switch (type) { - case LNET_MSG_ACK: - return "ACK"; - case LNET_MSG_PUT: - return "PUT"; - case LNET_MSG_GET: - return "GET"; - case LNET_MSG_REPLY: - return "REPLY"; - case LNET_MSG_HELLO: - return "HELLO"; - default: - return "<UNKNOWN>"; - } -} - -void -lnet_print_hdr(struct lnet_hdr *hdr) -{ - struct lnet_process_id src = {0}; - struct lnet_process_id dst = {0}; - char *type_str = lnet_msgtyp2str(hdr->type); - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - dst.nid = hdr->dest_nid; - dst.pid = hdr->dest_pid; - - CWARN("P3 Header at %p of type %s\n", hdr, type_str); - CWARN(" From %s\n", libcfs_id2str(src)); - CWARN(" To %s\n", libcfs_id2str(dst)); - - switch (hdr->type) { - default: - break; - - case LNET_MSG_PUT: - CWARN(" Ptl index %d, ack md %#llx.%#llx, match bits %llu\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - CWARN(" Length %d, offset %d, hdr data %#llx\n", - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.hdr_data); - break; - - case LNET_MSG_GET: - CWARN(" Ptl index %d, return md %#llx.%#llx, match bits %llu\n", - hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CWARN(" Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); - break; - - case LNET_MSG_ACK: - CWARN(" dst md %#llx.%#llx, manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); - break; - - case LNET_MSG_REPLY: - CWARN(" dst md %#llx.%#llx, length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - hdr->payload_length); - } -} - -int -lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, - void *private, int rdma_req) -{ - int rc = 0; - int cpt; - int for_me; - struct lnet_msg *msg; - lnet_pid_t dest_pid; - lnet_nid_t dest_nid; - lnet_nid_t src_nid; - __u32 payload_length; - __u32 type; - - LASSERT(!in_interrupt()); - - type = le32_to_cpu(hdr->type); - src_nid = le64_to_cpu(hdr->src_nid); - dest_nid = le64_to_cpu(hdr->dest_nid); - dest_pid = le32_to_cpu(hdr->dest_pid); - payload_length = le32_to_cpu(hdr->payload_length); - - for_me = (ni->ni_nid == dest_nid); - cpt = lnet_cpt_of_nid(from_nid); - - switch (type) { - case LNET_MSG_ACK: - case LNET_MSG_GET: - if (payload_length > 0) { - CERROR("%s, src %s: bad %s payload %d (0 expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), payload_length); - return -EPROTO; - } - break; - - case LNET_MSG_PUT: - case LNET_MSG_REPLY: - if (payload_length > - (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) { - CERROR("%s, src %s: bad %s payload %d (%d max expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), - payload_length, - for_me ? LNET_MAX_PAYLOAD : LNET_MTU); - return -EPROTO; - } - break; - - default: - CERROR("%s, src %s: Bad message type 0x%x\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), type); - return -EPROTO; - } - - if (the_lnet.ln_routing && - ni->ni_last_alive != ktime_get_real_seconds()) { - /* NB: so far here is the only place to set NI status to "up */ - lnet_ni_lock(ni); - ni->ni_last_alive = ktime_get_real_seconds(); - if (ni->ni_status && - ni->ni_status->ns_status == LNET_NI_STATUS_DOWN) - ni->ni_status->ns_status = LNET_NI_STATUS_UP; - lnet_ni_unlock(ni); - } - - /* - * Regard a bad destination NID as a protocol error. Senders should - * know what they're doing; if they don't they're misconfigured, buggy - * or malicious so we chop them off at the knees :) - */ - if (!for_me) { - if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) { - /* should have gone direct */ - CERROR("%s, src %s: Bad dest nid %s (should have been sent direct)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (lnet_islocalnid(dest_nid)) { - /* - * dest is another local NI; sender should have used - * this node's NID on its own network - */ - CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (rdma_req && type == LNET_MSG_GET) { - CERROR("%s, src %s: Bad optimized GET for %s (final destination must be me)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (!the_lnet.ln_routing) { - CERROR("%s, src %s: Dropping message for %s (routing not enabled)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - goto drop; - } - } - - /* - * Message looks OK; we're not going to return an error, so we MUST - * call back lnd_recv() come what may... - */ - if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer(src_nid, 0)) { /* shall we now? */ - CERROR("%s, src %s: Dropping %s to simulate failure\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type)); - goto drop; - } - - if (!list_empty(&the_lnet.ln_drop_rules) && - lnet_drop_rule_match(hdr)) { - CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid), lnet_msgtyp2str(type)); - goto drop; - } - - msg = kzalloc(sizeof(*msg), GFP_NOFS); - if (!msg) { - CERROR("%s, src %s: Dropping %s (out of memory)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type)); - goto drop; - } - - /* msg zeroed by kzalloc() - * i.e. flags all clear, pointers NULL etc - */ - msg->msg_type = type; - msg->msg_private = private; - msg->msg_receiving = 1; - msg->msg_rdma_get = rdma_req; - msg->msg_wanted = payload_length; - msg->msg_len = payload_length; - msg->msg_offset = 0; - msg->msg_hdr = *hdr; - /* for building message event */ - msg->msg_from = from_nid; - if (!for_me) { - msg->msg_target.pid = dest_pid; - msg->msg_target.nid = dest_nid; - msg->msg_routing = 1; - - } else { - /* convert common msg->hdr fields to host byteorder */ - msg->msg_hdr.type = type; - msg->msg_hdr.src_nid = src_nid; - le32_to_cpus(&msg->msg_hdr.src_pid); - msg->msg_hdr.dest_nid = dest_nid; - msg->msg_hdr.dest_pid = dest_pid; - msg->msg_hdr.payload_length = payload_length; - } - - lnet_net_lock(cpt); - rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt); - if (rc) { - lnet_net_unlock(cpt); - CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), rc); - kfree(msg); - if (rc == -ESHUTDOWN) - /* We are shutting down. Don't do anything more */ - return 0; - goto drop; - } - - if (lnet_isrouter(msg->msg_rxpeer)) { - lnet_peer_set_alive(msg->msg_rxpeer); - if (avoid_asym_router_failure && - LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) { - /* received a remote message from router, update - * remote NI status on this router. - * NB: multi-hop routed message will be ignored. - */ - lnet_router_ni_update_locked(msg->msg_rxpeer, - LNET_NIDNET(src_nid)); - } - } - - lnet_msg_commit(msg, cpt); - - /* message delay simulation */ - if (unlikely(!list_empty(&the_lnet.ln_delay_rules) && - lnet_delay_rule_match_locked(hdr, msg))) { - lnet_net_unlock(cpt); - return 0; - } - - if (!for_me) { - rc = lnet_parse_forward_locked(ni, msg); - lnet_net_unlock(cpt); - - if (rc < 0) - goto free_drop; - - if (rc == LNET_CREDIT_OK) { - lnet_ni_recv(ni, msg->msg_private, msg, 0, - 0, payload_length, payload_length); - } - return 0; - } - - lnet_net_unlock(cpt); - - rc = lnet_parse_local(ni, msg); - if (rc) - goto free_drop; - return 0; - - free_drop: - LASSERT(!msg->msg_md); - lnet_finalize(ni, msg, rc); - - drop: - lnet_drop_message(ni, cpt, private, payload_length); - return 0; -} -EXPORT_SYMBOL(lnet_parse); - -void -lnet_drop_delayed_msg_list(struct list_head *head, char *reason) -{ - while (!list_empty(head)) { - struct lnet_process_id id = {0}; - struct lnet_msg *msg; - - msg = list_entry(head->next, struct lnet_msg, msg_list); - list_del(&msg->msg_list); - - id.nid = msg->msg_hdr.src_nid; - id.pid = msg->msg_hdr.src_pid; - - LASSERT(!msg->msg_md); - LASSERT(msg->msg_rx_delayed); - LASSERT(msg->msg_rxpeer); - LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); - - CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n", - libcfs_id2str(id), - msg->msg_hdr.msg.put.ptl_index, - msg->msg_hdr.msg.put.match_bits, - msg->msg_hdr.msg.put.offset, - msg->msg_hdr.payload_length, reason); - - /* - * NB I can't drop msg's ref on msg_rxpeer until after I've - * called lnet_drop_message(), so I just hang onto msg as well - * until that's done - */ - lnet_drop_message(msg->msg_rxpeer->lp_ni, - msg->msg_rxpeer->lp_cpt, - msg->msg_private, msg->msg_len); - /* - * NB: message will not generate event because w/o attached MD, - * but we still should give error code so lnet_msg_decommit() - * can skip counters operations and other checks. - */ - lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT); - } -} - -void -lnet_recv_delayed_msg_list(struct list_head *head) -{ - while (!list_empty(head)) { - struct lnet_msg *msg; - struct lnet_process_id id; - - msg = list_entry(head->next, struct lnet_msg, msg_list); - list_del(&msg->msg_list); - - /* - * md won't disappear under me, since each msg - * holds a ref on it - */ - id.nid = msg->msg_hdr.src_nid; - id.pid = msg->msg_hdr.src_pid; - - LASSERT(msg->msg_rx_delayed); - LASSERT(msg->msg_md); - LASSERT(msg->msg_rxpeer); - LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); - - CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", - libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index, - msg->msg_hdr.msg.put.match_bits, - msg->msg_hdr.msg.put.offset, - msg->msg_hdr.payload_length); - - lnet_recv_put(msg->msg_rxpeer->lp_ni, msg); - } -} - -/** - * Initiate an asynchronous PUT operation. - * - * There are several events associated with a PUT: completion of the send on - * the initiator node (LNET_EVENT_SEND), and when the send completes - * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating - * that the operation was accepted by the target. The event LNET_EVENT_PUT is - * used at the target node to indicate the completion of incoming data - * delivery. - * - * The local events will be logged in the EQ associated with the MD pointed to - * by \a mdh handle. Using a MD without an associated EQ results in these - * events being discarded. In this case, the caller must have another - * mechanism (e.g., a higher level protocol) for determining when it is safe - * to modify the memory region associated with the MD. - * - * Note that LNet does not guarantee the order of LNET_EVENT_SEND and - * LNET_EVENT_ACK, though intuitively ACK should happen after SEND. - * - * \param self Indicates the NID of a local interface through which to send - * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself. - * \param mdh A handle for the MD that describes the memory to be sent. The MD - * must be "free floating" (See LNetMDBind()). - * \param ack Controls whether an acknowledgment is requested. - * Acknowledgments are only sent when they are requested by the initiating - * process and the target MD enables them. - * \param target A process identifier for the target process. - * \param portal The index in the \a target's portal table. - * \param match_bits The match bits to use for MD selection at the target - * process. - * \param offset The offset into the target MD (only used when the target - * MD has the LNET_MD_MANAGE_REMOTE option set). - * \param hdr_data 64 bits of user data that can be included in the message - * header. This data is written to an event queue entry at the target if an - * EQ is present on the matching MD. - * - * \retval 0 Success, and only in this case events will be generated - * and logged to EQ (if it exists). - * \retval -EIO Simulated failure. - * \retval -ENOMEM Memory allocation failure. - * \retval -ENOENT Invalid MD object. - * - * \see lnet_event::hdr_data and lnet_event_kind. - */ -int -LNetPut(lnet_nid_t self, struct lnet_handle_md mdh, enum lnet_ack_req ack, - struct lnet_process_id target, unsigned int portal, - __u64 match_bits, unsigned int offset, - __u64 hdr_data) -{ - struct lnet_msg *msg; - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer(target.nid, 1)) { /* shall we now? */ - CERROR("Dropping PUT to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = kzalloc(sizeof(*msg), GFP_NOFS); - if (!msg) { - CERROR("Dropping PUT to %s: ENOMEM on struct lnet_msg\n", - libcfs_id2str(target)); - return -ENOMEM; - } - msg->msg_vmflush = !!memory_pressure_get(); - - cpt = lnet_cpt_of_cookie(mdh.cookie); - lnet_res_lock(cpt); - - md = lnet_handle2md(&mdh); - if (!md || !md->md_threshold || md->md_me) { - CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n", - match_bits, portal, libcfs_id2str(target), - !md ? -1 : md->md_threshold); - if (md && md->md_me) - CERROR("Source MD also attached to portal %d\n", - md->md_me->me_portal); - lnet_res_unlock(cpt); - - kfree(msg); - return -ENOENT; - } - - CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target)); - - lnet_msg_attach_md(msg, md, 0, 0); - - lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length); - - msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.put.offset = cpu_to_le32(offset); - msg->msg_hdr.msg.put.hdr_data = hdr_data; - - /* NB handles only looked up by creator (no flips) */ - if (ack == LNET_ACK_REQ) { - msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - } else { - msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie = - LNET_WIRE_HANDLE_COOKIE_NONE; - msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie = - LNET_WIRE_HANDLE_COOKIE_NONE; - } - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_SEND); - - rc = lnet_send(self, msg, LNET_NID_ANY); - if (rc) { - CNETERR("Error sending PUT to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize(NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} -EXPORT_SYMBOL(LNetPut); - -struct lnet_msg * -lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg) -{ - /* - * The LND can DMA direct to the GET md (i.e. no REPLY msg). This - * returns a msg for the LND to pass to lnet_finalize() when the sink - * data has been received. - * - * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when - * lnet_finalize() is called on it, so the LND must call this first - */ - struct lnet_msg *msg = kzalloc(sizeof(*msg), GFP_NOFS); - struct lnet_libmd *getmd = getmsg->msg_md; - struct lnet_process_id peer_id = getmsg->msg_target; - int cpt; - - LASSERT(!getmsg->msg_target_is_router); - LASSERT(!getmsg->msg_routing); - - if (!msg) { - CERROR("%s: Dropping REPLY from %s: can't allocate msg\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id)); - goto drop; - } - - cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie); - lnet_res_lock(cpt); - - LASSERT(getmd->md_refcount > 0); - - if (!getmd->md_threshold) { - CERROR("%s: Dropping REPLY from %s for inactive MD %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), - getmd); - lnet_res_unlock(cpt); - goto drop; - } - - LASSERT(!getmd->md_offset); - - CDEBUG(D_NET, "%s: Reply from %s md %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd); - - /* setup information for lnet_build_msg_event */ - msg->msg_from = peer_id.nid; - msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */ - msg->msg_hdr.src_nid = peer_id.nid; - msg->msg_hdr.payload_length = getmd->md_length; - msg->msg_receiving = 1; /* required by lnet_msg_attach_md */ - - lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length); - lnet_res_unlock(cpt); - - cpt = lnet_cpt_of_nid(peer_id.nid); - - lnet_net_lock(cpt); - lnet_msg_commit(msg, cpt); - lnet_net_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_REPLY); - - return msg; - - drop: - cpt = lnet_cpt_of_nid(peer_id.nid); - - lnet_net_lock(cpt); - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += getmd->md_length; - lnet_net_unlock(cpt); - - kfree(msg); - - return NULL; -} -EXPORT_SYMBOL(lnet_create_reply_msg); - -void -lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *reply, - unsigned int len) -{ - /* - * Set the REPLY length, now the RDMA that elides the REPLY message has - * completed and I know it. - */ - LASSERT(reply); - LASSERT(reply->msg_type == LNET_MSG_GET); - LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY); - - /* - * NB I trusted my peer to RDMA. If she tells me she's written beyond - * the end of my buffer, I might as well be dead. - */ - LASSERT(len <= reply->msg_ev.mlength); - - reply->msg_ev.mlength = len; -} -EXPORT_SYMBOL(lnet_set_reply_msg_len); - -/** - * Initiate an asynchronous GET operation. - * - * On the initiator node, an LNET_EVENT_SEND is logged when the GET request - * is sent, and an LNET_EVENT_REPLY is logged when the data returned from - * the target node in the REPLY has been written to local MD. - * - * On the target node, an LNET_EVENT_GET is logged when the GET request - * arrives and is accepted into a MD. - * - * \param self,target,portal,match_bits,offset See the discussion in LNetPut(). - * \param mdh A handle for the MD that describes the memory into which the - * requested data will be received. The MD must be "free floating" - * (See LNetMDBind()). - * - * \retval 0 Success, and only in this case events will be generated - * and logged to EQ (if it exists) of the MD. - * \retval -EIO Simulated failure. - * \retval -ENOMEM Memory allocation failure. - * \retval -ENOENT Invalid MD object. - */ -int -LNetGet(lnet_nid_t self, struct lnet_handle_md mdh, - struct lnet_process_id target, unsigned int portal, - __u64 match_bits, unsigned int offset) -{ - struct lnet_msg *msg; - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer(target.nid, 1)) { /* shall we now? */ - CERROR("Dropping GET to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = kzalloc(sizeof(*msg), GFP_NOFS); - if (!msg) { - CERROR("Dropping GET to %s: ENOMEM on struct lnet_msg\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - cpt = lnet_cpt_of_cookie(mdh.cookie); - lnet_res_lock(cpt); - - md = lnet_handle2md(&mdh); - if (!md || !md->md_threshold || md->md_me) { - CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n", - match_bits, portal, libcfs_id2str(target), - !md ? -1 : md->md_threshold); - if (md && md->md_me) - CERROR("REPLY MD also attached to portal %d\n", - md->md_me->me_portal); - - lnet_res_unlock(cpt); - - kfree(msg); - return -ENOENT; - } - - CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target)); - - lnet_msg_attach_md(msg, md, 0, 0); - - lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0); - - msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset); - msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length); - - /* NB handles only looked up by creator (no flips) */ - msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.get.return_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_SEND); - - rc = lnet_send(self, msg, LNET_NID_ANY); - if (rc < 0) { - CNETERR("Error sending GET to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize(NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} -EXPORT_SYMBOL(LNetGet); - -/** - * Calculate distance to node at \a dstnid. - * - * \param dstnid Target NID. - * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid - * is saved here. - * \param orderp If not NULL, order of the route to reach \a dstnid is saved - * here. - * - * \retval 0 If \a dstnid belongs to a local interface, and reserved option - * local_nid_dist_zero is set, which is the default. - * \retval positives Distance to target NID, i.e. number of hops plus one. - * \retval -EHOSTUNREACH If \a dstnid is not reachable. - */ -int -LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) -{ - struct list_head *e; - struct lnet_ni *ni; - struct lnet_remotenet *rnet; - __u32 dstnet = LNET_NIDNET(dstnid); - int hops; - int cpt; - __u32 order = 2; - struct list_head *rn_list; - - /* - * if !local_nid_dist_zero, I don't return a distance of 0 ever - * (when lustre sees a distance of 0, it substitutes 0@lo), so I - * keep order 0 free for 0@lo and order 1 free for a local NID - * match - */ - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_net_lock_current(); - - list_for_each(e, &the_lnet.ln_nis) { - ni = list_entry(e, struct lnet_ni, ni_list); - - if (ni->ni_nid == dstnid) { - if (srcnidp) - *srcnidp = dstnid; - if (orderp) { - if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND) - *orderp = 0; - else - *orderp = 1; - } - lnet_net_unlock(cpt); - - return local_nid_dist_zero ? 0 : 1; - } - - if (LNET_NIDNET(ni->ni_nid) == dstnet) { - /* - * Check if ni was originally created in - * current net namespace. - * If not, assign order above 0xffff0000, - * to make this ni not a priority. - */ - if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns)) - order += 0xffff0000; - - if (srcnidp) - *srcnidp = ni->ni_nid; - if (orderp) - *orderp = order; - lnet_net_unlock(cpt); - return 1; - } - - order++; - } - - rn_list = lnet_net2rnethash(dstnet); - list_for_each(e, rn_list) { - rnet = list_entry(e, struct lnet_remotenet, lrn_list); - - if (rnet->lrn_net == dstnet) { - struct lnet_route *route; - struct lnet_route *shortest = NULL; - __u32 shortest_hops = LNET_UNDEFINED_HOPS; - __u32 route_hops; - - LASSERT(!list_empty(&rnet->lrn_routes)); - - list_for_each_entry(route, &rnet->lrn_routes, - lr_list) { - route_hops = route->lr_hops; - if (route_hops == LNET_UNDEFINED_HOPS) - route_hops = 1; - if (!shortest || - route_hops < shortest_hops) { - shortest = route; - shortest_hops = route_hops; - } - } - - LASSERT(shortest); - hops = shortest_hops; - if (srcnidp) - *srcnidp = shortest->lr_gateway->lp_ni->ni_nid; - if (orderp) - *orderp = order; - lnet_net_unlock(cpt); - return hops + 1; - } - order++; - } - - lnet_net_unlock(cpt); - return -EHOSTUNREACH; -} -EXPORT_SYMBOL(LNetDist); diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c deleted file mode 100644 index 0091273c04b9..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-msg.c +++ /dev/null @@ -1,625 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-msg.c - * - * Message decoding, parsing and finalizing routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> - -void -lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev) -{ - memset(ev, 0, sizeof(*ev)); - - ev->status = 0; - ev->unlinked = 1; - ev->type = LNET_EVENT_UNLINK; - lnet_md_deconstruct(md, &ev->md); - lnet_md2handle(&ev->md_handle, md); -} - -/* - * Don't need any lock, must be called after lnet_commit_md - */ -void -lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_event *ev = &msg->msg_ev; - - LASSERT(!msg->msg_routing); - - ev->type = ev_type; - - if (ev_type == LNET_EVENT_SEND) { - /* event for active message */ - ev->target.nid = le64_to_cpu(hdr->dest_nid); - ev->target.pid = le32_to_cpu(hdr->dest_pid); - ev->initiator.nid = LNET_NID_ANY; - ev->initiator.pid = the_lnet.ln_pid; - ev->sender = LNET_NID_ANY; - } else { - /* event for passive message */ - ev->target.pid = hdr->dest_pid; - ev->target.nid = hdr->dest_nid; - ev->initiator.pid = hdr->src_pid; - ev->initiator.nid = hdr->src_nid; - ev->rlength = hdr->payload_length; - ev->sender = msg->msg_from; - ev->mlength = msg->msg_wanted; - ev->offset = msg->msg_offset; - } - - switch (ev_type) { - default: - LBUG(); - - case LNET_EVENT_PUT: /* passive PUT */ - ev->pt_index = hdr->msg.put.ptl_index; - ev->match_bits = hdr->msg.put.match_bits; - ev->hdr_data = hdr->msg.put.hdr_data; - return; - - case LNET_EVENT_GET: /* passive GET */ - ev->pt_index = hdr->msg.get.ptl_index; - ev->match_bits = hdr->msg.get.match_bits; - ev->hdr_data = 0; - return; - - case LNET_EVENT_ACK: /* ACK */ - ev->match_bits = hdr->msg.ack.match_bits; - ev->mlength = hdr->msg.ack.mlength; - return; - - case LNET_EVENT_REPLY: /* REPLY */ - return; - - case LNET_EVENT_SEND: /* active message */ - if (msg->msg_type == LNET_MSG_PUT) { - ev->pt_index = le32_to_cpu(hdr->msg.put.ptl_index); - ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits); - ev->offset = le32_to_cpu(hdr->msg.put.offset); - ev->mlength = - ev->rlength = le32_to_cpu(hdr->payload_length); - ev->hdr_data = le64_to_cpu(hdr->msg.put.hdr_data); - - } else { - LASSERT(msg->msg_type == LNET_MSG_GET); - ev->pt_index = le32_to_cpu(hdr->msg.get.ptl_index); - ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits); - ev->mlength = - ev->rlength = le32_to_cpu(hdr->msg.get.sink_length); - ev->offset = le32_to_cpu(hdr->msg.get.src_offset); - ev->hdr_data = 0; - } - return; - } -} - -void -lnet_msg_commit(struct lnet_msg *msg, int cpt) -{ - struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt]; - struct lnet_counters *counters = the_lnet.ln_counters[cpt]; - - /* routed message can be committed for both receiving and sending */ - LASSERT(!msg->msg_tx_committed); - - if (msg->msg_sending) { - LASSERT(!msg->msg_receiving); - - msg->msg_tx_cpt = cpt; - msg->msg_tx_committed = 1; - if (msg->msg_rx_committed) { /* routed message REPLY */ - LASSERT(msg->msg_onactivelist); - return; - } - } else { - LASSERT(!msg->msg_sending); - msg->msg_rx_cpt = cpt; - msg->msg_rx_committed = 1; - } - - LASSERT(!msg->msg_onactivelist); - msg->msg_onactivelist = 1; - list_add(&msg->msg_activelist, &container->msc_active); - - counters->msgs_alloc++; - if (counters->msgs_alloc > counters->msgs_max) - counters->msgs_max = counters->msgs_alloc; -} - -static void -lnet_msg_decommit_tx(struct lnet_msg *msg, int status) -{ - struct lnet_counters *counters; - struct lnet_event *ev = &msg->msg_ev; - - LASSERT(msg->msg_tx_committed); - if (status) - goto out; - - counters = the_lnet.ln_counters[msg->msg_tx_cpt]; - switch (ev->type) { - default: /* routed message */ - LASSERT(msg->msg_routing); - LASSERT(msg->msg_rx_committed); - LASSERT(!ev->type); - - counters->route_length += msg->msg_len; - counters->route_count++; - goto out; - - case LNET_EVENT_PUT: - /* should have been decommitted */ - LASSERT(!msg->msg_rx_committed); - /* overwritten while sending ACK */ - LASSERT(msg->msg_type == LNET_MSG_ACK); - msg->msg_type = LNET_MSG_PUT; /* fix type */ - break; - - case LNET_EVENT_SEND: - LASSERT(!msg->msg_rx_committed); - if (msg->msg_type == LNET_MSG_PUT) - counters->send_length += msg->msg_len; - break; - - case LNET_EVENT_GET: - LASSERT(msg->msg_rx_committed); - /* - * overwritten while sending reply, we should never be - * here for optimized GET - */ - LASSERT(msg->msg_type == LNET_MSG_REPLY); - msg->msg_type = LNET_MSG_GET; /* fix type */ - break; - } - - counters->send_count++; - out: - lnet_return_tx_credits_locked(msg); - msg->msg_tx_committed = 0; -} - -static void -lnet_msg_decommit_rx(struct lnet_msg *msg, int status) -{ - struct lnet_counters *counters; - struct lnet_event *ev = &msg->msg_ev; - - LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */ - LASSERT(msg->msg_rx_committed); - - if (status) - goto out; - - counters = the_lnet.ln_counters[msg->msg_rx_cpt]; - switch (ev->type) { - default: - LASSERT(!ev->type); - LASSERT(msg->msg_routing); - goto out; - - case LNET_EVENT_ACK: - LASSERT(msg->msg_type == LNET_MSG_ACK); - break; - - case LNET_EVENT_GET: - /* - * type is "REPLY" if it's an optimized GET on passive side, - * because optimized GET will never be committed for sending, - * so message type wouldn't be changed back to "GET" by - * lnet_msg_decommit_tx(), see details in lnet_parse_get() - */ - LASSERT(msg->msg_type == LNET_MSG_REPLY || - msg->msg_type == LNET_MSG_GET); - counters->send_length += msg->msg_wanted; - break; - - case LNET_EVENT_PUT: - LASSERT(msg->msg_type == LNET_MSG_PUT); - break; - - case LNET_EVENT_REPLY: - /* - * type is "GET" if it's an optimized GET on active side, - * see details in lnet_create_reply_msg() - */ - LASSERT(msg->msg_type == LNET_MSG_GET || - msg->msg_type == LNET_MSG_REPLY); - break; - } - - counters->recv_count++; - if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY) - counters->recv_length += msg->msg_wanted; - - out: - lnet_return_rx_credits_locked(msg); - msg->msg_rx_committed = 0; -} - -void -lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status) -{ - int cpt2 = cpt; - - LASSERT(msg->msg_tx_committed || msg->msg_rx_committed); - LASSERT(msg->msg_onactivelist); - - if (msg->msg_tx_committed) { /* always decommit for sending first */ - LASSERT(cpt == msg->msg_tx_cpt); - lnet_msg_decommit_tx(msg, status); - } - - if (msg->msg_rx_committed) { - /* forwarding msg committed for both receiving and sending */ - if (cpt != msg->msg_rx_cpt) { - lnet_net_unlock(cpt); - cpt2 = msg->msg_rx_cpt; - lnet_net_lock(cpt2); - } - lnet_msg_decommit_rx(msg, status); - } - - list_del(&msg->msg_activelist); - msg->msg_onactivelist = 0; - - the_lnet.ln_counters[cpt2]->msgs_alloc--; - - if (cpt2 != cpt) { - lnet_net_unlock(cpt2); - lnet_net_lock(cpt); - } -} - -void -lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md, - unsigned int offset, unsigned int mlen) -{ - /* NB: @offset and @len are only useful for receiving */ - /* - * Here, we attach the MD on lnet_msg and mark it busy and - * decrementing its threshold. Come what may, the lnet_msg "owns" - * the MD until a call to lnet_msg_detach_md or lnet_finalize() - * signals completion. - */ - LASSERT(!msg->msg_routing); - - msg->msg_md = md; - if (msg->msg_receiving) { /* committed for receiving */ - msg->msg_offset = offset; - msg->msg_wanted = mlen; - } - - md->md_refcount++; - if (md->md_threshold != LNET_MD_THRESH_INF) { - LASSERT(md->md_threshold > 0); - md->md_threshold--; - } - - /* build umd in event */ - lnet_md2handle(&msg->msg_ev.md_handle, md); - lnet_md_deconstruct(md, &msg->msg_ev.md); -} - -void -lnet_msg_detach_md(struct lnet_msg *msg, int status) -{ - struct lnet_libmd *md = msg->msg_md; - int unlink; - - /* Now it's safe to drop my caller's ref */ - md->md_refcount--; - LASSERT(md->md_refcount >= 0); - - unlink = lnet_md_unlinkable(md); - if (md->md_eq) { - msg->msg_ev.status = status; - msg->msg_ev.unlinked = unlink; - lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev); - } - - if (unlink) - lnet_md_unlink(md); - - msg->msg_md = NULL; -} - -static int -lnet_complete_msg_locked(struct lnet_msg *msg, int cpt) -{ - struct lnet_handle_wire ack_wmd; - int rc; - int status = msg->msg_ev.status; - - LASSERT(msg->msg_onactivelist); - - if (!status && msg->msg_ack) { - /* Only send an ACK if the PUT completed successfully */ - - lnet_msg_decommit(msg, cpt, 0); - - msg->msg_ack = 0; - lnet_net_unlock(cpt); - - LASSERT(msg->msg_ev.type == LNET_EVENT_PUT); - LASSERT(!msg->msg_routing); - - ack_wmd = msg->msg_hdr.msg.put.ack_wmd; - - lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0); - - msg->msg_hdr.msg.ack.dst_wmd = ack_wmd; - msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits; - msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); - - /* - * NB: we probably want to use NID of msg::msg_from as 3rd - * parameter (router NID) if it's routed message - */ - rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY); - - lnet_net_lock(cpt); - /* - * NB: message is committed for sending, we should return - * on success because LND will finalize this message later. - * - * Also, there is possibility that message is committed for - * sending and also failed before delivering to LND, - * i.e: ENOMEM, in that case we can't fall through either - * because CPT for sending can be different with CPT for - * receiving, so we should return back to lnet_finalize() - * to make sure we are locking the correct partition. - */ - return rc; - - } else if (!status && /* OK so far */ - (msg->msg_routing && !msg->msg_sending)) { - /* not forwarded */ - LASSERT(!msg->msg_receiving); /* called back recv already */ - lnet_net_unlock(cpt); - - rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY); - - lnet_net_lock(cpt); - /* - * NB: message is committed for sending, we should return - * on success because LND will finalize this message later. - * - * Also, there is possibility that message is committed for - * sending and also failed before delivering to LND, - * i.e: ENOMEM, in that case we can't fall through either: - * - The rule is message must decommit for sending first if - * the it's committed for both sending and receiving - * - CPT for sending can be different with CPT for receiving, - * so we should return back to lnet_finalize() to make - * sure we are locking the correct partition. - */ - return rc; - } - - lnet_msg_decommit(msg, cpt, status); - kfree(msg); - return 0; -} - -void -lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int status) -{ - struct lnet_msg_container *container; - int my_slot; - int cpt; - int rc; - int i; - - LASSERT(!in_interrupt()); - - if (!msg) - return; - - msg->msg_ev.status = status; - - if (msg->msg_md) { - cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie); - - lnet_res_lock(cpt); - lnet_msg_detach_md(msg, status); - lnet_res_unlock(cpt); - } - - again: - rc = 0; - if (!msg->msg_tx_committed && !msg->msg_rx_committed) { - /* not committed to network yet */ - LASSERT(!msg->msg_onactivelist); - kfree(msg); - return; - } - - /* - * NB: routed message can be committed for both receiving and sending, - * we should finalize in LIFO order and keep counters correct. - * (finalize sending first then finalize receiving) - */ - cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt; - lnet_net_lock(cpt); - - container = the_lnet.ln_msg_containers[cpt]; - list_add_tail(&msg->msg_list, &container->msc_finalizing); - - /* - * Recursion breaker. Don't complete the message here if I am (or - * enough other threads are) already completing messages - */ - my_slot = -1; - for (i = 0; i < container->msc_nfinalizers; i++) { - if (container->msc_finalizers[i] == current) - break; - - if (my_slot < 0 && !container->msc_finalizers[i]) - my_slot = i; - } - - if (i < container->msc_nfinalizers || my_slot < 0) { - lnet_net_unlock(cpt); - return; - } - - container->msc_finalizers[my_slot] = current; - - while (!list_empty(&container->msc_finalizing)) { - msg = list_entry(container->msc_finalizing.next, - struct lnet_msg, msg_list); - - list_del(&msg->msg_list); - - /* - * NB drops and regains the lnet lock if it actually does - * anything, so my finalizing friends can chomp along too - */ - rc = lnet_complete_msg_locked(msg, cpt); - if (rc) - break; - } - - if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) { - lnet_net_unlock(cpt); - lnet_delay_rule_check(); - lnet_net_lock(cpt); - } - - container->msc_finalizers[my_slot] = NULL; - lnet_net_unlock(cpt); - - if (rc) - goto again; -} -EXPORT_SYMBOL(lnet_finalize); - -void -lnet_msg_container_cleanup(struct lnet_msg_container *container) -{ - int count = 0; - - if (!container->msc_init) - return; - - while (!list_empty(&container->msc_active)) { - struct lnet_msg *msg; - - msg = list_entry(container->msc_active.next, - struct lnet_msg, msg_activelist); - LASSERT(msg->msg_onactivelist); - msg->msg_onactivelist = 0; - list_del(&msg->msg_activelist); - kfree(msg); - count++; - } - - if (count > 0) - CERROR("%d active msg on exit\n", count); - - kvfree(container->msc_finalizers); - container->msc_finalizers = NULL; - container->msc_init = 0; -} - -int -lnet_msg_container_setup(struct lnet_msg_container *container, int cpt) -{ - container->msc_init = 1; - - INIT_LIST_HEAD(&container->msc_active); - INIT_LIST_HEAD(&container->msc_finalizing); - - /* number of CPUs */ - container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt); - - container->msc_finalizers = kvzalloc_cpt(container->msc_nfinalizers * - sizeof(*container->msc_finalizers), - GFP_KERNEL, cpt); - - if (!container->msc_finalizers) { - CERROR("Failed to allocate message finalizers\n"); - lnet_msg_container_cleanup(container); - return -ENOMEM; - } - - return 0; -} - -void -lnet_msg_containers_destroy(void) -{ - struct lnet_msg_container *container; - int i; - - if (!the_lnet.ln_msg_containers) - return; - - cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) - lnet_msg_container_cleanup(container); - - cfs_percpt_free(the_lnet.ln_msg_containers); - the_lnet.ln_msg_containers = NULL; -} - -int -lnet_msg_containers_create(void) -{ - struct lnet_msg_container *container; - int rc; - int i; - - the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*container)); - - if (!the_lnet.ln_msg_containers) { - CERROR("Failed to allocate cpu-partition data for network\n"); - return -ENOMEM; - } - - cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) { - rc = lnet_msg_container_setup(container, i); - if (rc) { - lnet_msg_containers_destroy(); - return rc; - } - } - - return 0; -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c deleted file mode 100644 index fc47379c5938..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c +++ /dev/null @@ -1,987 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-ptl.c - * - * portal & match routines - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> - -/* NB: add /proc interfaces in upcoming patches */ -int portal_rotor = LNET_PTL_ROTOR_HASH_RT; -module_param(portal_rotor, int, 0644); -MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions"); - -static int -lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id, - __u64 mbits, __u64 ignore_bits) -{ - struct lnet_portal *ptl = the_lnet.ln_portals[index]; - int unique; - - unique = !ignore_bits && - match_id.nid != LNET_NID_ANY && - match_id.pid != LNET_PID_ANY; - - LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl)); - - /* prefer to check w/o any lock */ - if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) - goto match; - - /* unset, new portal */ - lnet_ptl_lock(ptl); - /* check again with lock */ - if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) { - lnet_ptl_unlock(ptl); - goto match; - } - - /* still not set */ - if (unique) - lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE); - else - lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD); - - lnet_ptl_unlock(ptl); - - return 1; - - match: - if ((lnet_ptl_is_unique(ptl) && !unique) || - (lnet_ptl_is_wildcard(ptl) && unique)) - return 0; - return 1; -} - -static void -lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt) -{ - struct lnet_match_table *mtable = ptl->ptl_mtables[cpt]; - int i; - - /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */ - LASSERT(lnet_ptl_is_wildcard(ptl)); - - mtable->mt_enabled = 1; - - ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt; - for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) { - LASSERT(ptl->ptl_mt_maps[i] != cpt); - if (ptl->ptl_mt_maps[i] < cpt) - break; - - /* swap to order */ - ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i]; - ptl->ptl_mt_maps[i] = cpt; - } - - ptl->ptl_mt_nmaps++; -} - -static void -lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt) -{ - struct lnet_match_table *mtable = ptl->ptl_mtables[cpt]; - int i; - - /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */ - LASSERT(lnet_ptl_is_wildcard(ptl)); - - if (LNET_CPT_NUMBER == 1) - return; /* never disable the only match-table */ - - mtable->mt_enabled = 0; - - LASSERT(ptl->ptl_mt_nmaps > 0 && - ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER); - - /* remove it from mt_maps */ - ptl->ptl_mt_nmaps--; - for (i = 0; i < ptl->ptl_mt_nmaps; i++) { - if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */ - ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1]; - } -} - -static int -lnet_try_match_md(struct lnet_libmd *md, - struct lnet_match_info *info, struct lnet_msg *msg) -{ - /* - * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock; - * lnet_match_blocked_msg() relies on this to avoid races - */ - unsigned int offset; - unsigned int mlength; - struct lnet_me *me = md->md_me; - - /* MD exhausted */ - if (lnet_md_exhausted(md)) - return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED; - - /* mismatched MD op */ - if (!(md->md_options & info->mi_opc)) - return LNET_MATCHMD_NONE; - - /* mismatched ME nid/pid? */ - if (me->me_match_id.nid != LNET_NID_ANY && - me->me_match_id.nid != info->mi_id.nid) - return LNET_MATCHMD_NONE; - - if (me->me_match_id.pid != LNET_PID_ANY && - me->me_match_id.pid != info->mi_id.pid) - return LNET_MATCHMD_NONE; - - /* mismatched ME matchbits? */ - if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) - return LNET_MATCHMD_NONE; - - /* Hurrah! This _is_ a match; check it out... */ - - if (!(md->md_options & LNET_MD_MANAGE_REMOTE)) - offset = md->md_offset; - else - offset = info->mi_roffset; - - if (md->md_options & LNET_MD_MAX_SIZE) { - mlength = md->md_max_size; - LASSERT(md->md_offset + mlength <= md->md_length); - } else { - mlength = md->md_length - offset; - } - - if (info->mi_rlength <= mlength) { /* fits in allowed space */ - mlength = info->mi_rlength; - } else if (!(md->md_options & LNET_MD_TRUNCATE)) { - /* this packet _really_ is too big */ - CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n", - libcfs_id2str(info->mi_id), info->mi_mbits, - info->mi_rlength, md->md_length - offset, mlength); - - return LNET_MATCHMD_DROP; - } - - /* Commit to this ME/MD */ - CDEBUG(D_NET, "Incoming %s index %x from %s of length %d/%d into md %#llx [%d] + %d\n", - (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get", - info->mi_portal, libcfs_id2str(info->mi_id), mlength, - info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset); - - lnet_msg_attach_md(msg, md, offset, mlength); - md->md_offset = offset + mlength; - - if (!lnet_md_exhausted(md)) - return LNET_MATCHMD_OK; - - /* - * Auto-unlink NOW, so the ME gets unlinked if required. - * We bumped md->md_refcount above so the MD just gets flagged - * for unlink when it is finalized. - */ - if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) - lnet_md_unlink(md); - - return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED; -} - -static struct lnet_match_table * -lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits) -{ - if (LNET_CPT_NUMBER == 1) - return ptl->ptl_mtables[0]; /* the only one */ - - /* if it's a unique portal, return match-table hashed by NID */ - return lnet_ptl_is_unique(ptl) ? - ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL; -} - -struct lnet_match_table * -lnet_mt_of_attach(unsigned int index, struct lnet_process_id id, - __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos) -{ - struct lnet_portal *ptl; - struct lnet_match_table *mtable; - - /* NB: called w/o lock */ - LASSERT(index < the_lnet.ln_nportals); - - if (!lnet_ptl_match_type(index, id, mbits, ignore_bits)) - return NULL; - - ptl = the_lnet.ln_portals[index]; - - mtable = lnet_match2mt(ptl, id, mbits); - if (mtable) /* unique portal or only one match-table */ - return mtable; - - /* it's a wildcard portal */ - switch (pos) { - default: - return NULL; - case LNET_INS_BEFORE: - case LNET_INS_AFTER: - /* - * posted by no affinity thread, always hash to specific - * match-table to avoid buffer stealing which is heavy - */ - return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER]; - case LNET_INS_LOCAL: - /* posted by cpu-affinity thread */ - return ptl->ptl_mtables[lnet_cpt_current()]; - } -} - -static struct lnet_match_table * -lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) -{ - struct lnet_match_table *mtable; - struct lnet_portal *ptl; - unsigned int nmaps; - unsigned int rotor; - unsigned int cpt; - bool routed; - - /* NB: called w/o lock */ - LASSERT(info->mi_portal < the_lnet.ln_nportals); - ptl = the_lnet.ln_portals[info->mi_portal]; - - LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)); - - mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits); - if (mtable) - return mtable; - - /* it's a wildcard portal */ - routed = LNET_NIDNET(msg->msg_hdr.src_nid) != - LNET_NIDNET(msg->msg_hdr.dest_nid); - - if (portal_rotor == LNET_PTL_ROTOR_OFF || - (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) { - cpt = lnet_cpt_current(); - if (ptl->ptl_mtables[cpt]->mt_enabled) - return ptl->ptl_mtables[cpt]; - } - - rotor = ptl->ptl_rotor++; /* get round-robin factor */ - if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed) - cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid); - else - cpt = rotor % LNET_CPT_NUMBER; - - if (!ptl->ptl_mtables[cpt]->mt_enabled) { - /* is there any active entry for this portal? */ - nmaps = ptl->ptl_mt_nmaps; - /* map to an active mtable to avoid heavy "stealing" */ - if (nmaps) { - /* - * NB: there is possibility that ptl_mt_maps is being - * changed because we are not under protection of - * lnet_ptl_lock, but it shouldn't hurt anything - */ - cpt = ptl->ptl_mt_maps[rotor % nmaps]; - } - } - - return ptl->ptl_mtables[cpt]; -} - -static int -lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos) -{ - __u64 *bmap; - int i; - - if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])) - return 0; - - if (pos < 0) { /* check all bits */ - for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) { - if (mtable->mt_exhausted[i] != (__u64)(-1)) - return 0; - } - return 1; - } - - LASSERT(pos <= LNET_MT_HASH_IGNORE); - /* mtable::mt_mhash[pos] is marked as exhausted or not */ - bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64]; - pos &= (1 << LNET_MT_BITS_U64) - 1; - - return (*bmap & BIT(pos)); -} - -static void -lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted) -{ - __u64 *bmap; - - LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])); - LASSERT(pos <= LNET_MT_HASH_IGNORE); - - /* set mtable::mt_mhash[pos] as exhausted/non-exhausted */ - bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64]; - pos &= (1 << LNET_MT_BITS_U64) - 1; - - if (!exhausted) - *bmap &= ~(1ULL << pos); - else - *bmap |= 1ULL << pos; -} - -struct list_head * -lnet_mt_match_head(struct lnet_match_table *mtable, - struct lnet_process_id id, __u64 mbits) -{ - struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal]; - unsigned long hash = mbits; - - if (!lnet_ptl_is_wildcard(ptl)) { - hash += id.nid + id.pid; - - LASSERT(lnet_ptl_is_unique(ptl)); - hash = hash_long(hash, LNET_MT_HASH_BITS); - } - return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK]; -} - -int -lnet_mt_match_md(struct lnet_match_table *mtable, - struct lnet_match_info *info, struct lnet_msg *msg) -{ - struct list_head *head; - struct lnet_me *me; - struct lnet_me *tmp; - int exhausted = 0; - int rc; - - /* any ME with ignore bits? */ - if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE])) - head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; - else - head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); - again: - /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */ - if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])) - exhausted = LNET_MATCHMD_EXHAUSTED; - - list_for_each_entry_safe(me, tmp, head, me_list) { - /* ME attached but MD not attached yet */ - if (!me->me_md) - continue; - - LASSERT(me == me->me_md->md_me); - - rc = lnet_try_match_md(me->me_md, info, msg); - if (!(rc & LNET_MATCHMD_EXHAUSTED)) - exhausted = 0; /* mlist is not empty */ - - if (rc & LNET_MATCHMD_FINISH) { - /* - * don't return EXHAUSTED bit because we don't know - * whether the mlist is empty or not - */ - return rc & ~LNET_MATCHMD_EXHAUSTED; - } - } - - if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */ - lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1); - if (!lnet_mt_test_exhausted(mtable, -1)) - exhausted = 0; - } - - if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) { - head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); - goto again; /* re-check MEs w/o ignore-bits */ - } - - if (info->mi_opc == LNET_MD_OP_GET || - !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal])) - return exhausted | LNET_MATCHMD_DROP; - - return exhausted | LNET_MATCHMD_NONE; -} - -static int -lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg) -{ - int rc; - - /* - * message arrived before any buffer posting on this portal, - * simply delay or drop this message - */ - if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl))) - return 0; - - lnet_ptl_lock(ptl); - /* check it again with hold of lock */ - if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) { - lnet_ptl_unlock(ptl); - return 0; - } - - if (lnet_ptl_is_lazy(ptl)) { - if (msg->msg_rx_ready_delay) { - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, - &ptl->ptl_msg_delayed); - } - rc = LNET_MATCHMD_NONE; - } else { - rc = LNET_MATCHMD_DROP; - } - - lnet_ptl_unlock(ptl); - return rc; -} - -static int -lnet_ptl_match_delay(struct lnet_portal *ptl, - struct lnet_match_info *info, struct lnet_msg *msg) -{ - int first = ptl->ptl_mt_maps[0]; /* read w/o lock */ - int rc = 0; - int i; - - /** - * Steal buffer from other CPTs, and delay msg if nothing to - * steal. This function is more expensive than a regular - * match, but we don't expect it can happen a lot. The return - * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or - * LNET_MATCHMD_NONE. - */ - LASSERT(lnet_ptl_is_wildcard(ptl)); - - for (i = 0; i < LNET_CPT_NUMBER; i++) { - struct lnet_match_table *mtable; - int cpt; - - cpt = (first + i) % LNET_CPT_NUMBER; - mtable = ptl->ptl_mtables[cpt]; - if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled) - continue; - - lnet_res_lock(cpt); - lnet_ptl_lock(ptl); - - if (!i) { - /* The first try, add to stealing list. */ - list_add_tail(&msg->msg_list, - &ptl->ptl_msg_stealing); - } - - if (!list_empty(&msg->msg_list)) { - /* On stealing list. */ - rc = lnet_mt_match_md(mtable, info, msg); - - if ((rc & LNET_MATCHMD_EXHAUSTED) && - mtable->mt_enabled) - lnet_ptl_disable_mt(ptl, cpt); - - if (rc & LNET_MATCHMD_FINISH) { - /* Match found, remove from stealing list. */ - list_del_init(&msg->msg_list); - } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */ - !ptl->ptl_mt_nmaps || /* (2) */ - (ptl->ptl_mt_nmaps == 1 && /* (3) */ - ptl->ptl_mt_maps[0] == cpt)) { - /** - * No match found, and this is either - * (1) the last cpt to check, or - * (2) there is no active cpt, or - * (3) this is the only active cpt. - * There is nothing to steal: delay or - * drop the message. - */ - list_del_init(&msg->msg_list); - - if (lnet_ptl_is_lazy(ptl)) { - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, - &ptl->ptl_msg_delayed); - rc = LNET_MATCHMD_NONE; - } else { - rc = LNET_MATCHMD_DROP; - } - } else { - /* Do another iteration. */ - rc = 0; - } - } else { - /** - * No longer on stealing list: another thread - * matched the message in lnet_ptl_attach_md(). - * We are now expected to handle the message. - */ - rc = !msg->msg_md ? - LNET_MATCHMD_DROP : LNET_MATCHMD_OK; - } - - lnet_ptl_unlock(ptl); - lnet_res_unlock(cpt); - - /** - * Note that test (1) above ensures that we always - * exit the loop through this break statement. - * - * LNET_MATCHMD_NONE means msg was added to the - * delayed queue, and we may no longer reference it - * after lnet_ptl_unlock() and lnet_res_unlock(). - */ - if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE)) - break; - } - - return rc; -} - -int -lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg) -{ - struct lnet_match_table *mtable; - struct lnet_portal *ptl; - int rc; - - CDEBUG(D_NET, "Request from %s of length %d into portal %d MB=%#llx\n", - libcfs_id2str(info->mi_id), info->mi_rlength, info->mi_portal, - info->mi_mbits); - - if (info->mi_portal >= the_lnet.ln_nportals) { - CERROR("Invalid portal %d not in [0-%d]\n", - info->mi_portal, the_lnet.ln_nportals); - return LNET_MATCHMD_DROP; - } - - ptl = the_lnet.ln_portals[info->mi_portal]; - rc = lnet_ptl_match_early(ptl, msg); - if (rc) /* matched or delayed early message */ - return rc; - - mtable = lnet_mt_of_match(info, msg); - lnet_res_lock(mtable->mt_cpt); - - if (the_lnet.ln_shutdown) { - rc = LNET_MATCHMD_DROP; - goto out1; - } - - rc = lnet_mt_match_md(mtable, info, msg); - if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) { - lnet_ptl_lock(ptl); - lnet_ptl_disable_mt(ptl, mtable->mt_cpt); - lnet_ptl_unlock(ptl); - } - - if (rc & LNET_MATCHMD_FINISH) /* matched or dropping */ - goto out1; - - if (!msg->msg_rx_ready_delay) - goto out1; - - LASSERT(lnet_ptl_is_lazy(ptl)); - LASSERT(!msg->msg_rx_delayed); - - /* NB: we don't expect "delay" can happen a lot */ - if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) { - lnet_ptl_lock(ptl); - - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed); - - lnet_ptl_unlock(ptl); - lnet_res_unlock(mtable->mt_cpt); - rc = LNET_MATCHMD_NONE; - } else { - lnet_res_unlock(mtable->mt_cpt); - rc = lnet_ptl_match_delay(ptl, info, msg); - } - - /* LNET_MATCHMD_NONE means msg was added to the delay queue */ - if (rc & LNET_MATCHMD_NONE) { - CDEBUG(D_NET, - "Delaying %s from %s ptl %d MB %#llx off %d len %d\n", - info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET", - libcfs_id2str(info->mi_id), info->mi_portal, - info->mi_mbits, info->mi_roffset, info->mi_rlength); - } - goto out0; - out1: - lnet_res_unlock(mtable->mt_cpt); - out0: - /* EXHAUSTED bit is only meaningful for internal functions */ - return rc & ~LNET_MATCHMD_EXHAUSTED; -} - -void -lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md) -{ - LASSERT(me->me_md == md && md->md_me == me); - - me->me_md = NULL; - md->md_me = NULL; -} - -/* called with lnet_res_lock held */ -void -lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md, - struct list_head *matches, struct list_head *drops) -{ - struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal]; - struct lnet_match_table *mtable; - struct list_head *head; - struct lnet_msg *tmp; - struct lnet_msg *msg; - int exhausted = 0; - int cpt; - - LASSERT(!md->md_refcount); /* a brand new MD */ - - me->me_md = md; - md->md_me = me; - - cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie); - mtable = ptl->ptl_mtables[cpt]; - - if (list_empty(&ptl->ptl_msg_stealing) && - list_empty(&ptl->ptl_msg_delayed) && - !lnet_mt_test_exhausted(mtable, me->me_pos)) - return; - - lnet_ptl_lock(ptl); - head = &ptl->ptl_msg_stealing; - again: - list_for_each_entry_safe(msg, tmp, head, msg_list) { - struct lnet_match_info info; - struct lnet_hdr *hdr; - int rc; - - LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing); - - hdr = &msg->msg_hdr; - info.mi_id.nid = hdr->src_nid; - info.mi_id.pid = hdr->src_pid; - info.mi_opc = LNET_MD_OP_PUT; - info.mi_portal = hdr->msg.put.ptl_index; - info.mi_rlength = hdr->payload_length; - info.mi_roffset = hdr->msg.put.offset; - info.mi_mbits = hdr->msg.put.match_bits; - - rc = lnet_try_match_md(md, &info, msg); - - exhausted = (rc & LNET_MATCHMD_EXHAUSTED); - if (rc & LNET_MATCHMD_NONE) { - if (exhausted) - break; - continue; - } - - /* Hurrah! This _is_ a match */ - LASSERT(rc & LNET_MATCHMD_FINISH); - list_del_init(&msg->msg_list); - - if (head == &ptl->ptl_msg_stealing) { - if (exhausted) - break; - /* stealing thread will handle the message */ - continue; - } - - if (rc & LNET_MATCHMD_OK) { - list_add_tail(&msg->msg_list, matches); - - CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", - libcfs_id2str(info.mi_id), - info.mi_portal, info.mi_mbits, - info.mi_roffset, info.mi_rlength); - } else { - list_add_tail(&msg->msg_list, drops); - } - - if (exhausted) - break; - } - - if (!exhausted && head == &ptl->ptl_msg_stealing) { - head = &ptl->ptl_msg_delayed; - goto again; - } - - if (lnet_ptl_is_wildcard(ptl) && !exhausted) { - lnet_mt_set_exhausted(mtable, me->me_pos, 0); - if (!mtable->mt_enabled) - lnet_ptl_enable_mt(ptl, cpt); - } - - lnet_ptl_unlock(ptl); -} - -static void -lnet_ptl_cleanup(struct lnet_portal *ptl) -{ - struct lnet_match_table *mtable; - int i; - - if (!ptl->ptl_mtables) /* uninitialized portal */ - return; - - LASSERT(list_empty(&ptl->ptl_msg_delayed)); - LASSERT(list_empty(&ptl->ptl_msg_stealing)); - cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) { - struct list_head *mhash; - struct lnet_me *me; - int j; - - if (!mtable->mt_mhash) /* uninitialized match-table */ - continue; - - mhash = mtable->mt_mhash; - /* cleanup ME */ - for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) { - while (!list_empty(&mhash[j])) { - me = list_entry(mhash[j].next, - struct lnet_me, me_list); - CERROR("Active ME %p on exit\n", me); - list_del(&me->me_list); - kfree(me); - } - } - /* the extra entry is for MEs with ignore bits */ - kvfree(mhash); - } - - cfs_percpt_free(ptl->ptl_mtables); - ptl->ptl_mtables = NULL; -} - -static int -lnet_ptl_setup(struct lnet_portal *ptl, int index) -{ - struct lnet_match_table *mtable; - struct list_head *mhash; - int i; - int j; - - ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct lnet_match_table)); - if (!ptl->ptl_mtables) { - CERROR("Failed to create match table for portal %d\n", index); - return -ENOMEM; - } - - ptl->ptl_index = index; - INIT_LIST_HEAD(&ptl->ptl_msg_delayed); - INIT_LIST_HEAD(&ptl->ptl_msg_stealing); - spin_lock_init(&ptl->ptl_lock); - cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) { - /* the extra entry is for MEs with ignore bits */ - mhash = kvzalloc_cpt(sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1), - GFP_KERNEL, i); - if (!mhash) { - CERROR("Failed to create match hash for portal %d\n", - index); - goto failed; - } - - memset(&mtable->mt_exhausted[0], -1, - sizeof(mtable->mt_exhausted[0]) * - LNET_MT_EXHAUSTED_BMAP); - mtable->mt_mhash = mhash; - for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) - INIT_LIST_HEAD(&mhash[j]); - - mtable->mt_portal = index; - mtable->mt_cpt = i; - } - - return 0; - failed: - lnet_ptl_cleanup(ptl); - return -ENOMEM; -} - -void -lnet_portals_destroy(void) -{ - int i; - - if (!the_lnet.ln_portals) - return; - - for (i = 0; i < the_lnet.ln_nportals; i++) - lnet_ptl_cleanup(the_lnet.ln_portals[i]); - - cfs_array_free(the_lnet.ln_portals); - the_lnet.ln_portals = NULL; - the_lnet.ln_nportals = 0; -} - -int -lnet_portals_create(void) -{ - int size; - int i; - - size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]); - - the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size); - if (!the_lnet.ln_portals) { - CERROR("Failed to allocate portals table\n"); - return -ENOMEM; - } - the_lnet.ln_nportals = MAX_PORTALS; - - for (i = 0; i < the_lnet.ln_nportals; i++) { - if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) { - lnet_portals_destroy(); - return -ENOMEM; - } - } - - return 0; -} - -/** - * Turn on the lazy portal attribute. Use with caution! - * - * This portal attribute only affects incoming PUT requests to the portal, - * and is off by default. By default, if there's no matching MD for an - * incoming PUT request, it is simply dropped. With the lazy attribute on, - * such requests are queued indefinitely until either a matching MD is - * posted to the portal or the lazy attribute is turned off. - * - * It would prevent dropped requests, however it should be regarded as the - * last line of defense - i.e. users must keep a close watch on active - * buffers on a lazy portal and once it becomes too low post more buffers as - * soon as possible. This is because delayed requests usually have detrimental - * effects on underlying network connections. A few delayed requests often - * suffice to bring an underlying connection to a complete halt, due to flow - * control mechanisms. - * - * There's also a DOS attack risk. If users don't post match-all MDs on a - * lazy portal, a malicious peer can easily stop a service by sending some - * PUT requests with match bits that won't match any MD. A routed server is - * especially vulnerable since the connections to its neighbor routers are - * shared among all clients. - * - * \param portal Index of the portal to enable the lazy attribute on. - * - * \retval 0 On success. - * \retval -EINVAL If \a portal is not a valid index. - */ -int -LNetSetLazyPortal(int portal) -{ - struct lnet_portal *ptl; - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - CDEBUG(D_NET, "Setting portal %d lazy\n", portal); - ptl = the_lnet.ln_portals[portal]; - - lnet_res_lock(LNET_LOCK_EX); - lnet_ptl_lock(ptl); - - lnet_ptl_setopt(ptl, LNET_PTL_LAZY); - - lnet_ptl_unlock(ptl); - lnet_res_unlock(LNET_LOCK_EX); - - return 0; -} -EXPORT_SYMBOL(LNetSetLazyPortal); - -int -lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason) -{ - struct lnet_portal *ptl; - LIST_HEAD(zombies); - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - ptl = the_lnet.ln_portals[portal]; - - lnet_res_lock(LNET_LOCK_EX); - lnet_ptl_lock(ptl); - - if (!lnet_ptl_is_lazy(ptl)) { - lnet_ptl_unlock(ptl); - lnet_res_unlock(LNET_LOCK_EX); - return 0; - } - - if (ni) { - struct lnet_msg *msg, *tmp; - - /* grab all messages which are on the NI passed in */ - list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed, - msg_list) { - if (msg->msg_rxpeer->lp_ni == ni) - list_move(&msg->msg_list, &zombies); - } - } else { - if (the_lnet.ln_shutdown) - CWARN("Active lazy portal %d on exit\n", portal); - else - CDEBUG(D_NET, "clearing portal %d lazy\n", portal); - - /* grab all the blocked messages atomically */ - list_splice_init(&ptl->ptl_msg_delayed, &zombies); - - lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY); - } - - lnet_ptl_unlock(ptl); - lnet_res_unlock(LNET_LOCK_EX); - - lnet_drop_delayed_msg_list(&zombies, reason); - - return 0; -} - -/** - * Turn off the lazy portal attribute. Delayed requests on the portal, - * if any, will be all dropped when this function returns. - * - * \param portal Index of the portal to disable the lazy attribute on. - * - * \retval 0 On success. - * \retval -EINVAL If \a portal is not a valid index. - */ -int -LNetClearLazyPortal(int portal) -{ - return lnet_clear_lazy_portal(NULL, portal, - "Clearing lazy portal attr"); -} -EXPORT_SYMBOL(LNetClearLazyPortal); diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c deleted file mode 100644 index 1bee667802b0..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ /dev/null @@ -1,586 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/if.h> -#include <linux/in.h> -#include <linux/net.h> -#include <linux/file.h> -#include <linux/pagemap.h> -/* For sys_open & sys_close */ -#include <linux/syscalls.h> -#include <net/sock.h> - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> - -static int -kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = filp->f_op->unlocked_ioctl(filp, cmd, arg); - set_fs(oldfs); - - return err; -} - -static int -lnet_sock_ioctl(int cmd, unsigned long arg) -{ - struct file *sock_filp; - struct socket *sock; - int rc; - - rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); - if (rc) { - CERROR("Can't create socket: %d\n", rc); - return rc; - } - - sock_filp = sock_alloc_file(sock, 0, NULL); - if (IS_ERR(sock_filp)) - return PTR_ERR(sock_filp); - - rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg); - - fput(sock_filp); - return rc; -} - -int -lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct ifreq ifr; - int nob; - int rc; - __be32 val; - - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - return -EINVAL; - } - - BUILD_BUG_ON(sizeof(ifr.ifr_name) < IFNAMSIZ); - - if (strlen(name) > sizeof(ifr.ifr_name) - 1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); - if (rc) { - CERROR("Can't get flags for interface %s\n", name); - return rc; - } - - if (!(ifr.ifr_flags & IFF_UP)) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - return 0; - } - *up = 1; - - if (strlen(name) > sizeof(ifr.ifr_name) - 1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - ifr.ifr_addr.sa_family = AF_INET; - rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); - if (rc) { - CERROR("Can't get IP address for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - if (strlen(name) > sizeof(ifr.ifr_name) - 1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - ifr.ifr_addr.sa_family = AF_INET; - rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); - if (rc) { - CERROR("Can't get netmask for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; - *mask = ntohl(val); - - return 0; -} -EXPORT_SYMBOL(lnet_ipif_query); - -int -lnet_ipif_enumerate(char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > PAGE_SIZE) { - toobig = 1; - nalloc = PAGE_SIZE / sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - ifr = kzalloc(nalloc * sizeof(*ifr), GFP_KERNEL); - if (!ifr) { - CERROR("ENOMEM enumerating up to %d interfaces\n", - nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - - rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); - if (rc < 0) { - CERROR("Error %d enumerating interfaces\n", rc); - goto out1; - } - - LASSERT(!rc); - - nfound = ifc.ifc_len / sizeof(*ifr); - LASSERT(nfound <= nalloc); - - if (nfound < nalloc || toobig) - break; - - kfree(ifr); - nalloc *= 2; - } - - if (!nfound) - goto out1; - - names = kzalloc(nfound * sizeof(*names), GFP_KERNEL); - if (!names) { - rc = -ENOMEM; - goto out1; - } - - for (i = 0; i < nfound; i++) { - nob = strnlen(ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - names[i] = kmalloc(IFNAMSIZ, GFP_KERNEL); - if (!names[i]) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - -out2: - if (rc < 0) - lnet_ipif_free_enumeration(names, nfound); -out1: - kfree(ifr); -out0: - return rc; -} -EXPORT_SYMBOL(lnet_ipif_enumerate); - -void -lnet_ipif_free_enumeration(char **names, int n) -{ - int i; - - LASSERT(n > 0); - - for (i = 0; i < n && names[i]; i++) - kfree(names[i]); - - kfree(names); -} -EXPORT_SYMBOL(lnet_ipif_free_enumeration); - -int -lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); - unsigned long then; - struct timeval tv; - struct kvec iov = { .iov_base = buffer, .iov_len = nob }; - struct msghdr msg = {NULL,}; - - LASSERT(nob > 0); - /* - * Caller may pass a zero timeout if she thinks the socket buffer is - * empty enough to take the whole message immediately - */ - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob); - for (;;) { - msg.msg_flags = !timeout ? MSG_DONTWAIT : 0; - if (timeout) { - /* Set send timeout to remaining time */ - jiffies_to_timeval(jiffies_left, &tv); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, - (char *)&tv, sizeof(tv)); - if (rc) { - CERROR("Can't set socket send timeout %ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - } - - then = jiffies; - rc = kernel_sendmsg(sock, &msg, &iov, 1, nob); - jiffies_left -= jiffies - then; - - if (rc < 0) - return rc; - - if (!rc) { - CERROR("Unexpected zero rc\n"); - return -ECONNABORTED; - } - - if (!msg_data_left(&msg)) - break; - - if (jiffies_left <= 0) - return -EAGAIN; - } - return 0; -} -EXPORT_SYMBOL(lnet_sock_write); - -int -lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); - unsigned long then; - struct timeval tv; - struct kvec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_flags = 0 - }; - - LASSERT(nob > 0); - LASSERT(jiffies_left > 0); - - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, nob); - - for (;;) { - /* Set receive timeout to remaining time */ - jiffies_to_timeval(jiffies_left, &tv); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - (char *)&tv, sizeof(tv)); - if (rc) { - CERROR("Can't set socket recv timeout %ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - - then = jiffies; - rc = sock_recvmsg(sock, &msg, 0); - jiffies_left -= jiffies - then; - - if (rc < 0) - return rc; - - if (!rc) - return -ECONNRESET; - - if (!msg_data_left(&msg)) - return 0; - - if (jiffies_left <= 0) - return -ETIMEDOUT; - } -} -EXPORT_SYMBOL(lnet_sock_read); - -static int -lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip, - int local_port) -{ - struct sockaddr_in locaddr; - struct socket *sock; - int rc; - int option; - - /* All errors are fatal except bind failure if the port is in use */ - *fatal = 1; - - rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); - *sockp = sock; - if (rc) { - CERROR("Can't create socket: %d\n", rc); - return rc; - } - - option = 1; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); - goto failed; - } - - if (local_ip || local_port) { - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - if (!local_ip) - locaddr.sin_addr.s_addr = htonl(INADDR_ANY); - else - locaddr.sin_addr.s_addr = htonl(local_ip); - - rc = kernel_bind(sock, (struct sockaddr *)&locaddr, - sizeof(locaddr)); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto failed; - } - if (rc) { - CERROR("Error trying to bind to port %d: %d\n", - local_port, rc); - goto failed; - } - } - return 0; - -failed: - sock_release(sock); - return rc; -} - -int -lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize) -{ - int option; - int rc; - - if (txbufsize) { - option = txbufsize; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't set send buffer %d: %d\n", - option, rc); - return rc; - } - } - - if (rxbufsize) { - option = rxbufsize; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't set receive buffer %d: %d\n", - option, rc); - return rc; - } - } - return 0; -} -EXPORT_SYMBOL(lnet_sock_setbuf); - -int -lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) -{ - struct sockaddr_in sin; - int rc; - - if (remote) - rc = kernel_getpeername(sock, (struct sockaddr *)&sin); - else - rc = kernel_getsockname(sock, (struct sockaddr *)&sin); - if (rc < 0) { - CERROR("Error %d getting sock %s IP/port\n", - rc, remote ? "peer" : "local"); - return rc; - } - - if (ip) - *ip = ntohl(sin.sin_addr.s_addr); - - if (port) - *port = ntohs(sin.sin_port); - - return 0; -} -EXPORT_SYMBOL(lnet_sock_getaddr); - -int -lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) -{ - if (txbufsize) - *txbufsize = sock->sk->sk_sndbuf; - - if (rxbufsize) - *rxbufsize = sock->sk->sk_rcvbuf; - - return 0; -} -EXPORT_SYMBOL(lnet_sock_getbuf); - -int -lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port, - int backlog) -{ - int fatal; - int rc; - - rc = lnet_sock_create(sockp, &fatal, local_ip, local_port); - if (rc) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - } - - rc = kernel_listen(*sockp, backlog); - if (!rc) - return 0; - - CERROR("Can't set listen backlog %d: %d\n", backlog, rc); - sock_release(*sockp); - return rc; -} - -int -lnet_sock_accept(struct socket **newsockp, struct socket *sock) -{ - wait_queue_entry_t wait; - struct socket *newsock; - int rc; - - /* - * XXX this should add a ref to sock->ops->owner, if - * TCP could be a module - */ - rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); - if (rc) { - CERROR("Can't allocate socket\n"); - return rc; - } - - newsock->ops = sock->ops; - - rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); - if (rc == -EAGAIN) { - /* Nothing ready, so wait for activity */ - init_waitqueue_entry(&wait, current); - add_wait_queue(sk_sleep(sock->sk), &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - remove_wait_queue(sk_sleep(sock->sk), &wait); - rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); - } - - if (rc) - goto failed; - - *newsockp = newsock; - return 0; - -failed: - sock_release(newsock); - return rc; -} - -int -lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, - int local_port, __u32 peer_ip, int peer_port) -{ - struct sockaddr_in srvaddr; - int rc; - - rc = lnet_sock_create(sockp, fatal, local_ip, local_port); - if (rc) - return rc; - - memset(&srvaddr, 0, sizeof(srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(peer_port); - srvaddr.sin_addr.s_addr = htonl(peer_ip); - - rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr, - sizeof(srvaddr), 0); - if (!rc) - return 0; - - /* - * EADDRNOTAVAIL probably means we're already connected to the same - * peer/port on the same local port on a differently typed - * connection. Let our caller retry with a different local - * port... - */ - *fatal = !(rc == -EADDRNOTAVAIL); - - CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET, - "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc, - &local_ip, local_port, &peer_ip, peer_port); - - sock_release(*sockp); - return rc; -} diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c deleted file mode 100644 index 7456b989e451..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lo.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> - -static int -lolnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) -{ - LASSERT(!lntmsg->msg_routing); - LASSERT(!lntmsg->msg_target_is_router); - - return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0); -} - -static int -lolnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen) -{ - struct lnet_msg *sendmsg = private; - - if (lntmsg) { /* not discarding */ - if (sendmsg->msg_iov) - lnet_copy_iov2iter(to, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, - iov_iter_count(to)); - else - lnet_copy_kiov2iter(to, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, - iov_iter_count(to)); - - lnet_finalize(ni, lntmsg, 0); - } - - lnet_finalize(ni, sendmsg, 0); - return 0; -} - -static int lolnd_instanced; - -static void -lolnd_shutdown(struct lnet_ni *ni) -{ - CDEBUG(D_NET, "shutdown\n"); - LASSERT(lolnd_instanced); - - lolnd_instanced = 0; -} - -static int -lolnd_startup(struct lnet_ni *ni) -{ - LASSERT(ni->ni_lnd == &the_lolnd); - LASSERT(!lolnd_instanced); - lolnd_instanced = 1; - - return 0; -} - -struct lnet_lnd the_lolnd = { - /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list}, - /* .lnd_refcount = */ 0, - /* .lnd_type = */ LOLND, - /* .lnd_startup = */ lolnd_startup, - /* .lnd_shutdown = */ lolnd_shutdown, - /* .lnt_ctl = */ NULL, - /* .lnd_send = */ lolnd_send, - /* .lnd_recv = */ lolnd_recv, - /* .lnd_eager_recv = */ NULL, - /* .lnd_notify = */ NULL, - /* .lnd_accept = */ NULL -}; diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c deleted file mode 100644 index c0c4723f72fd..000000000000 --- a/drivers/staging/lustre/lnet/lnet/module.c +++ /dev/null @@ -1,223 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> -#include <uapi/linux/lnet/lnet-dlc.h> - -static int config_on_load; -module_param(config_on_load, int, 0444); -MODULE_PARM_DESC(config_on_load, "configure network at module load"); - -static struct mutex lnet_config_mutex; - -static int -lnet_configure(void *arg) -{ - /* 'arg' only there so I can be passed to cfs_create_thread() */ - int rc = 0; - - mutex_lock(&lnet_config_mutex); - - if (!the_lnet.ln_niinit_self) { - rc = try_module_get(THIS_MODULE); - - if (rc != 1) - goto out; - - rc = LNetNIInit(LNET_PID_LUSTRE); - if (rc >= 0) { - the_lnet.ln_niinit_self = 1; - rc = 0; - } else { - module_put(THIS_MODULE); - } - } - -out: - mutex_unlock(&lnet_config_mutex); - return rc; -} - -static int -lnet_unconfigure(void) -{ - int refcount; - - mutex_lock(&lnet_config_mutex); - - if (the_lnet.ln_niinit_self) { - the_lnet.ln_niinit_self = 0; - LNetNIFini(); - module_put(THIS_MODULE); - } - - mutex_lock(&the_lnet.ln_api_mutex); - refcount = the_lnet.ln_refcount; - mutex_unlock(&the_lnet.ln_api_mutex); - - mutex_unlock(&lnet_config_mutex); - return !refcount ? 0 : -EBUSY; -} - -static int -lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr) -{ - struct lnet_ioctl_config_data *conf = - (struct lnet_ioctl_config_data *)hdr; - int rc; - - if (conf->cfg_hdr.ioc_len < sizeof(*conf)) - return -EINVAL; - - mutex_lock(&lnet_config_mutex); - if (!the_lnet.ln_niinit_self) { - rc = -EINVAL; - goto out_unlock; - } - rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf); -out_unlock: - mutex_unlock(&lnet_config_mutex); - - return rc; -} - -static int -lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr) -{ - struct lnet_ioctl_config_data *conf = - (struct lnet_ioctl_config_data *)hdr; - int rc; - - if (conf->cfg_hdr.ioc_len < sizeof(*conf)) - return -EINVAL; - - mutex_lock(&lnet_config_mutex); - if (!the_lnet.ln_niinit_self) { - rc = -EINVAL; - goto out_unlock; - } - rc = lnet_dyn_del_ni(conf->cfg_net); -out_unlock: - mutex_unlock(&lnet_config_mutex); - - return rc; -} - -static int -lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_hdr *hdr) -{ - int rc; - - switch (cmd) { - case IOC_LIBCFS_CONFIGURE: { - struct libcfs_ioctl_data *data = - (struct libcfs_ioctl_data *)hdr; - - if (data->ioc_hdr.ioc_len < sizeof(*data)) - return -EINVAL; - - the_lnet.ln_nis_from_mod_params = data->ioc_flags; - return lnet_configure(NULL); - } - - case IOC_LIBCFS_UNCONFIGURE: - return lnet_unconfigure(); - - case IOC_LIBCFS_ADD_NET: - return lnet_dyn_configure(hdr); - - case IOC_LIBCFS_DEL_NET: - return lnet_dyn_unconfigure(hdr); - - default: - /* - * Passing LNET_PID_ANY only gives me a ref if the net is up - * already; I'll need it to ensure the net can't go down while - * I'm called into it - */ - rc = LNetNIInit(LNET_PID_ANY); - if (rc >= 0) { - rc = LNetCtl(cmd, hdr); - LNetNIFini(); - } - return rc; - } -} - -static DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl); - -static int __init lnet_init(void) -{ - int rc; - - mutex_init(&lnet_config_mutex); - - rc = lnet_lib_init(); - if (rc) { - CERROR("lnet_lib_init: error %d\n", rc); - return rc; - } - - rc = libcfs_register_ioctl(&lnet_ioctl_handler); - LASSERT(!rc); - - if (config_on_load) { - /* - * Have to schedule a separate thread to avoid deadlocking - * in modload - */ - (void)kthread_run(lnet_configure, NULL, "lnet_initd"); - } - - return 0; -} - -static void __exit lnet_exit(void) -{ - int rc; - - rc = libcfs_deregister_ioctl(&lnet_ioctl_handler); - LASSERT(!rc); - - lnet_lib_exit(); -} - -MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("Lustre Networking layer"); -MODULE_VERSION(LNET_VERSION); -MODULE_LICENSE("GPL"); - -module_init(lnet_init); -module_exit(lnet_exit); diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c deleted file mode 100644 index a63b7941d435..000000000000 --- a/drivers/staging/lustre/lnet/lnet/net_fault.c +++ /dev/null @@ -1,1023 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2014, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - * - * lnet/lnet/net_fault.c - * - * Lustre network fault simulation - * - * Author: liang.zhen@intel.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> -#include <uapi/linux/lnet/lnetctl.h> - -#define LNET_MSG_MASK (LNET_PUT_BIT | LNET_ACK_BIT | \ - LNET_GET_BIT | LNET_REPLY_BIT) - -struct lnet_drop_rule { - /** link chain on the_lnet.ln_drop_rules */ - struct list_head dr_link; - /** attributes of this rule */ - struct lnet_fault_attr dr_attr; - /** lock to protect \a dr_drop_at and \a dr_stat */ - spinlock_t dr_lock; - /** - * the message sequence to drop, which means message is dropped when - * dr_stat.drs_count == dr_drop_at - */ - unsigned long dr_drop_at; - /** - * seconds to drop the next message, it's exclusive with dr_drop_at - */ - unsigned long dr_drop_time; - /** baseline to caculate dr_drop_time */ - unsigned long dr_time_base; - /** statistic of dropped messages */ - struct lnet_fault_stat dr_stat; -}; - -static bool -lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid) -{ - if (nid == msg_nid || nid == LNET_NID_ANY) - return true; - - if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid)) - return false; - - /* 255.255.255.255@net is wildcard for all addresses in a network */ - return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY); -} - -static bool -lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal) -{ - if (!lnet_fault_nid_match(attr->fa_src, src) || - !lnet_fault_nid_match(attr->fa_dst, dst)) - return false; - - if (!(attr->fa_msg_mask & (1 << type))) - return false; - - /** - * NB: ACK and REPLY have no portal, but they should have been - * rejected by message mask - */ - if (attr->fa_ptl_mask && /* has portal filter */ - !(attr->fa_ptl_mask & (1ULL << portal))) - return false; - - return true; -} - -static int -lnet_fault_attr_validate(struct lnet_fault_attr *attr) -{ - if (!attr->fa_msg_mask) - attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */ - - if (!attr->fa_ptl_mask) /* no portal filter */ - return 0; - - /* NB: only PUT and GET can be filtered if portal filter has been set */ - attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT; - if (!attr->fa_msg_mask) { - CDEBUG(D_NET, "can't find valid message type bits %x\n", - attr->fa_msg_mask); - return -EINVAL; - } - return 0; -} - -static void -lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type) -{ - /* NB: fs_counter is NOT updated by this function */ - switch (type) { - case LNET_MSG_PUT: - stat->fs_put++; - return; - case LNET_MSG_ACK: - stat->fs_ack++; - return; - case LNET_MSG_GET: - stat->fs_get++; - return; - case LNET_MSG_REPLY: - stat->fs_reply++; - return; - } -} - -/** - * LNet message drop simulation - */ - -/** - * Add a new drop rule to LNet - * There is no check for duplicated drop rule, all rules will be checked for - * incoming message. - */ -static int -lnet_drop_rule_add(struct lnet_fault_attr *attr) -{ - struct lnet_drop_rule *rule; - - if (attr->u.drop.da_rate & attr->u.drop.da_interval) { - CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n", - attr->u.drop.da_rate, attr->u.drop.da_interval); - return -EINVAL; - } - - if (lnet_fault_attr_validate(attr)) - return -EINVAL; - - rule = kzalloc(sizeof(*rule), GFP_NOFS); - if (!rule) - return -ENOMEM; - - spin_lock_init(&rule->dr_lock); - - rule->dr_attr = *attr; - if (attr->u.drop.da_interval) { - rule->dr_time_base = cfs_time_shift(attr->u.drop.da_interval); - rule->dr_drop_time = cfs_time_shift( - prandom_u32_max(attr->u.drop.da_interval)); - } else { - rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate); - } - - lnet_net_lock(LNET_LOCK_EX); - list_add(&rule->dr_link, &the_lnet.ln_drop_rules); - lnet_net_unlock(LNET_LOCK_EX); - - CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n", - libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src), - attr->u.drop.da_rate, attr->u.drop.da_interval); - return 0; -} - -/** - * Remove matched drop rules from lnet, all rules that can match \a src and - * \a dst will be removed. - * If \a src is zero, then all rules have \a dst as destination will be remove - * If \a dst is zero, then all rules have \a src as source will be removed - * If both of them are zero, all rules will be removed - */ -static int -lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst) -{ - struct lnet_drop_rule *rule; - struct lnet_drop_rule *tmp; - struct list_head zombies; - int n = 0; - - INIT_LIST_HEAD(&zombies); - - lnet_net_lock(LNET_LOCK_EX); - list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) { - if (rule->dr_attr.fa_src != src && src) - continue; - - if (rule->dr_attr.fa_dst != dst && dst) - continue; - - list_move(&rule->dr_link, &zombies); - } - lnet_net_unlock(LNET_LOCK_EX); - - list_for_each_entry_safe(rule, tmp, &zombies, dr_link) { - CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n", - libcfs_nid2str(rule->dr_attr.fa_src), - libcfs_nid2str(rule->dr_attr.fa_dst), - rule->dr_attr.u.drop.da_rate, - rule->dr_attr.u.drop.da_interval); - - list_del(&rule->dr_link); - kfree(rule); - n++; - } - - return n; -} - -/** - * List drop rule at position of \a pos - */ -static int -lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr, - struct lnet_fault_stat *stat) -{ - struct lnet_drop_rule *rule; - int cpt; - int i = 0; - int rc = -ENOENT; - - cpt = lnet_net_lock_current(); - list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { - if (i++ < pos) - continue; - - spin_lock(&rule->dr_lock); - *attr = rule->dr_attr; - *stat = rule->dr_stat; - spin_unlock(&rule->dr_lock); - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} - -/** - * reset counters for all drop rules - */ -static void -lnet_drop_rule_reset(void) -{ - struct lnet_drop_rule *rule; - int cpt; - - cpt = lnet_net_lock_current(); - - list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { - struct lnet_fault_attr *attr = &rule->dr_attr; - - spin_lock(&rule->dr_lock); - - memset(&rule->dr_stat, 0, sizeof(rule->dr_stat)); - if (attr->u.drop.da_rate) { - rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate); - } else { - rule->dr_drop_time = cfs_time_shift( - prandom_u32_max(attr->u.drop.da_interval)); - rule->dr_time_base = cfs_time_shift(attr->u.drop.da_interval); - } - spin_unlock(&rule->dr_lock); - } - - lnet_net_unlock(cpt); -} - -/** - * check source/destination NID, portal, message type and drop rate, - * decide whether should drop this message or not - */ -static bool -drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal) -{ - struct lnet_fault_attr *attr = &rule->dr_attr; - bool drop; - - if (!lnet_fault_attr_match(attr, src, dst, type, portal)) - return false; - - /* match this rule, check drop rate now */ - spin_lock(&rule->dr_lock); - if (rule->dr_drop_time) { /* time based drop */ - unsigned long now = cfs_time_current(); - - rule->dr_stat.fs_count++; - drop = cfs_time_aftereq(now, rule->dr_drop_time); - if (drop) { - if (cfs_time_after(now, rule->dr_time_base)) - rule->dr_time_base = now; - - rule->dr_drop_time = rule->dr_time_base + - prandom_u32_max(attr->u.drop.da_interval) * HZ; - rule->dr_time_base += attr->u.drop.da_interval * HZ; - - CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), - rule->dr_drop_time); - } - - } else { /* rate based drop */ - drop = rule->dr_stat.fs_count++ == rule->dr_drop_at; - - if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) { - rule->dr_drop_at = rule->dr_stat.fs_count + - prandom_u32_max(attr->u.drop.da_rate); - CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), rule->dr_drop_at); - } - } - - if (drop) { /* drop this message, update counters */ - lnet_fault_stat_inc(&rule->dr_stat, type); - rule->dr_stat.u.drop.ds_dropped++; - } - - spin_unlock(&rule->dr_lock); - return drop; -} - -/** - * Check if message from \a src to \a dst can match any existed drop rule - */ -bool -lnet_drop_rule_match(struct lnet_hdr *hdr) -{ - struct lnet_drop_rule *rule; - lnet_nid_t src = le64_to_cpu(hdr->src_nid); - lnet_nid_t dst = le64_to_cpu(hdr->dest_nid); - unsigned int typ = le32_to_cpu(hdr->type); - unsigned int ptl = -1; - bool drop = false; - int cpt; - - /** - * NB: if Portal is specified, then only PUT and GET will be - * filtered by drop rule - */ - if (typ == LNET_MSG_PUT) - ptl = le32_to_cpu(hdr->msg.put.ptl_index); - else if (typ == LNET_MSG_GET) - ptl = le32_to_cpu(hdr->msg.get.ptl_index); - - cpt = lnet_net_lock_current(); - list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { - drop = drop_rule_match(rule, src, dst, typ, ptl); - if (drop) - break; - } - - lnet_net_unlock(cpt); - return drop; -} - -/** - * LNet Delay Simulation - */ -/** timestamp (second) to send delayed message */ -#define msg_delay_send msg_ev.hdr_data - -struct lnet_delay_rule { - /** link chain on the_lnet.ln_delay_rules */ - struct list_head dl_link; - /** link chain on delay_dd.dd_sched_rules */ - struct list_head dl_sched_link; - /** attributes of this rule */ - struct lnet_fault_attr dl_attr; - /** lock to protect \a below members */ - spinlock_t dl_lock; - /** refcount of delay rule */ - atomic_t dl_refcount; - /** - * the message sequence to delay, which means message is delayed when - * dl_stat.fs_count == dl_delay_at - */ - unsigned long dl_delay_at; - /** - * seconds to delay the next message, it's exclusive with dl_delay_at - */ - unsigned long dl_delay_time; - /** baseline to caculate dl_delay_time */ - unsigned long dl_time_base; - /** jiffies to send the next delayed message */ - unsigned long dl_msg_send; - /** delayed message list */ - struct list_head dl_msg_list; - /** statistic of delayed messages */ - struct lnet_fault_stat dl_stat; - /** timer to wakeup delay_daemon */ - struct timer_list dl_timer; -}; - -struct delay_daemon_data { - /** serialise rule add/remove */ - struct mutex dd_mutex; - /** protect rules on \a dd_sched_rules */ - spinlock_t dd_lock; - /** scheduled delay rules (by timer) */ - struct list_head dd_sched_rules; - /** daemon thread sleeps at here */ - wait_queue_head_t dd_waitq; - /** controller (lctl command) wait at here */ - wait_queue_head_t dd_ctl_waitq; - /** daemon is running */ - unsigned int dd_running; - /** daemon stopped */ - unsigned int dd_stopped; -}; - -static struct delay_daemon_data delay_dd; - -static unsigned long -round_timeout(unsigned long timeout) -{ - return (unsigned int)rounddown(timeout, HZ) + HZ; -} - -static void -delay_rule_decref(struct lnet_delay_rule *rule) -{ - if (atomic_dec_and_test(&rule->dl_refcount)) { - LASSERT(list_empty(&rule->dl_sched_link)); - LASSERT(list_empty(&rule->dl_msg_list)); - LASSERT(list_empty(&rule->dl_link)); - - kfree(rule); - } -} - -/** - * check source/destination NID, portal, message type and delay rate, - * decide whether should delay this message or not - */ -static bool -delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal, - struct lnet_msg *msg) -{ - struct lnet_fault_attr *attr = &rule->dl_attr; - bool delay; - - if (!lnet_fault_attr_match(attr, src, dst, type, portal)) - return false; - - /* match this rule, check delay rate now */ - spin_lock(&rule->dl_lock); - if (rule->dl_delay_time) { /* time based delay */ - unsigned long now = cfs_time_current(); - - rule->dl_stat.fs_count++; - delay = cfs_time_aftereq(now, rule->dl_delay_time); - if (delay) { - if (cfs_time_after(now, rule->dl_time_base)) - rule->dl_time_base = now; - - rule->dl_delay_time = rule->dl_time_base + - prandom_u32_max(attr->u.delay.la_interval) * HZ; - rule->dl_time_base += attr->u.delay.la_interval * HZ; - - CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), - rule->dl_delay_time); - } - - } else { /* rate based delay */ - delay = rule->dl_stat.fs_count++ == rule->dl_delay_at; - /* generate the next random rate sequence */ - if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) { - rule->dl_delay_at = rule->dl_stat.fs_count + - prandom_u32_max(attr->u.delay.la_rate); - CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), rule->dl_delay_at); - } - } - - if (!delay) { - spin_unlock(&rule->dl_lock); - return false; - } - - /* delay this message, update counters */ - lnet_fault_stat_inc(&rule->dl_stat, type); - rule->dl_stat.u.delay.ls_delayed++; - - list_add_tail(&msg->msg_list, &rule->dl_msg_list); - msg->msg_delay_send = round_timeout( - cfs_time_shift(attr->u.delay.la_latency)); - if (rule->dl_msg_send == -1) { - rule->dl_msg_send = msg->msg_delay_send; - mod_timer(&rule->dl_timer, rule->dl_msg_send); - } - - spin_unlock(&rule->dl_lock); - return true; -} - -/** - * check if \a msg can match any Delay Rule, receiving of this message - * will be delayed if there is a match. - */ -bool -lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg) -{ - struct lnet_delay_rule *rule; - lnet_nid_t src = le64_to_cpu(hdr->src_nid); - lnet_nid_t dst = le64_to_cpu(hdr->dest_nid); - unsigned int typ = le32_to_cpu(hdr->type); - unsigned int ptl = -1; - - /* NB: called with hold of lnet_net_lock */ - - /** - * NB: if Portal is specified, then only PUT and GET will be - * filtered by delay rule - */ - if (typ == LNET_MSG_PUT) - ptl = le32_to_cpu(hdr->msg.put.ptl_index); - else if (typ == LNET_MSG_GET) - ptl = le32_to_cpu(hdr->msg.get.ptl_index); - - list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { - if (delay_rule_match(rule, src, dst, typ, ptl, msg)) - return true; - } - - return false; -} - -/** check out delayed messages for send */ -static void -delayed_msg_check(struct lnet_delay_rule *rule, bool all, - struct list_head *msg_list) -{ - struct lnet_msg *msg; - struct lnet_msg *tmp; - unsigned long now = cfs_time_current(); - - if (!all && rule->dl_msg_send > now) - return; - - spin_lock(&rule->dl_lock); - list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) { - if (!all && msg->msg_delay_send > now) - break; - - msg->msg_delay_send = 0; - list_move_tail(&msg->msg_list, msg_list); - } - - if (list_empty(&rule->dl_msg_list)) { - del_timer(&rule->dl_timer); - rule->dl_msg_send = -1; - - } else if (!list_empty(msg_list)) { - /* - * dequeued some timedout messages, update timer for the - * next delayed message on rule - */ - msg = list_entry(rule->dl_msg_list.next, - struct lnet_msg, msg_list); - rule->dl_msg_send = msg->msg_delay_send; - mod_timer(&rule->dl_timer, rule->dl_msg_send); - } - spin_unlock(&rule->dl_lock); -} - -static void -delayed_msg_process(struct list_head *msg_list, bool drop) -{ - struct lnet_msg *msg; - - while (!list_empty(msg_list)) { - struct lnet_ni *ni; - int cpt; - int rc; - - msg = list_entry(msg_list->next, struct lnet_msg, msg_list); - LASSERT(msg->msg_rxpeer); - - ni = msg->msg_rxpeer->lp_ni; - cpt = msg->msg_rx_cpt; - - list_del_init(&msg->msg_list); - if (drop) { - rc = -ECANCELED; - - } else if (!msg->msg_routing) { - rc = lnet_parse_local(ni, msg); - if (!rc) - continue; - - } else { - lnet_net_lock(cpt); - rc = lnet_parse_forward_locked(ni, msg); - lnet_net_unlock(cpt); - - switch (rc) { - case LNET_CREDIT_OK: - lnet_ni_recv(ni, msg->msg_private, msg, 0, - 0, msg->msg_len, msg->msg_len); - /* fall through */ - case LNET_CREDIT_WAIT: - continue; - default: /* failures */ - break; - } - } - - lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len); - lnet_finalize(ni, msg, rc); - } -} - -/** - * Process delayed messages for scheduled rules - * This function can either be called by delay_rule_daemon, or by lnet_finalise - */ -void -lnet_delay_rule_check(void) -{ - struct lnet_delay_rule *rule; - struct list_head msgs; - - INIT_LIST_HEAD(&msgs); - while (1) { - if (list_empty(&delay_dd.dd_sched_rules)) - break; - - spin_lock_bh(&delay_dd.dd_lock); - if (list_empty(&delay_dd.dd_sched_rules)) { - spin_unlock_bh(&delay_dd.dd_lock); - break; - } - - rule = list_entry(delay_dd.dd_sched_rules.next, - struct lnet_delay_rule, dl_sched_link); - list_del_init(&rule->dl_sched_link); - spin_unlock_bh(&delay_dd.dd_lock); - - delayed_msg_check(rule, false, &msgs); - delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */ - } - - if (!list_empty(&msgs)) - delayed_msg_process(&msgs, false); -} - -/** daemon thread to handle delayed messages */ -static int -lnet_delay_rule_daemon(void *arg) -{ - delay_dd.dd_running = 1; - wake_up(&delay_dd.dd_ctl_waitq); - - while (delay_dd.dd_running) { - wait_event_interruptible(delay_dd.dd_waitq, - !delay_dd.dd_running || - !list_empty(&delay_dd.dd_sched_rules)); - lnet_delay_rule_check(); - } - - /* in case more rules have been enqueued after my last check */ - lnet_delay_rule_check(); - delay_dd.dd_stopped = 1; - wake_up(&delay_dd.dd_ctl_waitq); - - return 0; -} - -static void -delay_timer_cb(struct timer_list *t) -{ - struct lnet_delay_rule *rule = from_timer(rule, t, dl_timer); - - spin_lock_bh(&delay_dd.dd_lock); - if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) { - atomic_inc(&rule->dl_refcount); - list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules); - wake_up(&delay_dd.dd_waitq); - } - spin_unlock_bh(&delay_dd.dd_lock); -} - -/** - * Add a new delay rule to LNet - * There is no check for duplicated delay rule, all rules will be checked for - * incoming message. - */ -int -lnet_delay_rule_add(struct lnet_fault_attr *attr) -{ - struct lnet_delay_rule *rule; - int rc = 0; - - if (attr->u.delay.la_rate & attr->u.delay.la_interval) { - CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n", - attr->u.delay.la_rate, attr->u.delay.la_interval); - return -EINVAL; - } - - if (!attr->u.delay.la_latency) { - CDEBUG(D_NET, "delay latency cannot be zero\n"); - return -EINVAL; - } - - if (lnet_fault_attr_validate(attr)) - return -EINVAL; - - rule = kzalloc(sizeof(*rule), GFP_NOFS); - if (!rule) - return -ENOMEM; - - mutex_lock(&delay_dd.dd_mutex); - if (!delay_dd.dd_running) { - struct task_struct *task; - - /** - * NB: although LND threads will process delayed message - * in lnet_finalize, but there is no guarantee that LND - * threads will be waken up if no other message needs to - * be handled. - * Only one daemon thread, performance is not the concern - * of this simualation module. - */ - task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - goto failed; - } - wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running); - } - - timer_setup(&rule->dl_timer, delay_timer_cb, 0); - - spin_lock_init(&rule->dl_lock); - INIT_LIST_HEAD(&rule->dl_msg_list); - INIT_LIST_HEAD(&rule->dl_sched_link); - - rule->dl_attr = *attr; - if (attr->u.delay.la_interval) { - rule->dl_time_base = cfs_time_shift(attr->u.delay.la_interval); - rule->dl_delay_time = cfs_time_shift( - prandom_u32_max(attr->u.delay.la_interval)); - } else { - rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate); - } - - rule->dl_msg_send = -1; - - lnet_net_lock(LNET_LOCK_EX); - atomic_set(&rule->dl_refcount, 1); - list_add(&rule->dl_link, &the_lnet.ln_delay_rules); - lnet_net_unlock(LNET_LOCK_EX); - - CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n", - libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src), - attr->u.delay.la_rate); - - mutex_unlock(&delay_dd.dd_mutex); - return 0; -failed: - mutex_unlock(&delay_dd.dd_mutex); - kfree(rule); - return rc; -} - -/** - * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src - * and \a dst are zero, all rules will be removed, otherwise only matched rules - * will be removed. - * If \a src is zero, then all rules have \a dst as destination will be remove - * If \a dst is zero, then all rules have \a src as source will be removed - * - * When a delay rule is removed, all delayed messages of this rule will be - * processed immediately. - */ -int -lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown) -{ - struct lnet_delay_rule *rule; - struct lnet_delay_rule *tmp; - struct list_head rule_list; - struct list_head msg_list; - int n = 0; - bool cleanup; - - INIT_LIST_HEAD(&rule_list); - INIT_LIST_HEAD(&msg_list); - - if (shutdown) { - src = 0; - dst = 0; - } - - mutex_lock(&delay_dd.dd_mutex); - lnet_net_lock(LNET_LOCK_EX); - - list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) { - if (rule->dl_attr.fa_src != src && src) - continue; - - if (rule->dl_attr.fa_dst != dst && dst) - continue; - - CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n", - libcfs_nid2str(rule->dl_attr.fa_src), - libcfs_nid2str(rule->dl_attr.fa_dst), - rule->dl_attr.u.delay.la_rate, - rule->dl_attr.u.delay.la_interval); - /* refcount is taken over by rule_list */ - list_move(&rule->dl_link, &rule_list); - } - - /* check if we need to shutdown delay_daemon */ - cleanup = list_empty(&the_lnet.ln_delay_rules) && - !list_empty(&rule_list); - lnet_net_unlock(LNET_LOCK_EX); - - list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) { - list_del_init(&rule->dl_link); - - del_timer_sync(&rule->dl_timer); - delayed_msg_check(rule, true, &msg_list); - delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */ - n++; - } - - if (cleanup) { /* no more delay rule, shutdown delay_daemon */ - LASSERT(delay_dd.dd_running); - delay_dd.dd_running = 0; - wake_up(&delay_dd.dd_waitq); - - while (!delay_dd.dd_stopped) - wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped); - } - mutex_unlock(&delay_dd.dd_mutex); - - if (!list_empty(&msg_list)) - delayed_msg_process(&msg_list, shutdown); - - return n; -} - -/** - * List Delay Rule at position of \a pos - */ -int -lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr, - struct lnet_fault_stat *stat) -{ - struct lnet_delay_rule *rule; - int cpt; - int i = 0; - int rc = -ENOENT; - - cpt = lnet_net_lock_current(); - list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { - if (i++ < pos) - continue; - - spin_lock(&rule->dl_lock); - *attr = rule->dl_attr; - *stat = rule->dl_stat; - spin_unlock(&rule->dl_lock); - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} - -/** - * reset counters for all Delay Rules - */ -void -lnet_delay_rule_reset(void) -{ - struct lnet_delay_rule *rule; - int cpt; - - cpt = lnet_net_lock_current(); - - list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { - struct lnet_fault_attr *attr = &rule->dl_attr; - - spin_lock(&rule->dl_lock); - - memset(&rule->dl_stat, 0, sizeof(rule->dl_stat)); - if (attr->u.delay.la_rate) { - rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate); - } else { - rule->dl_delay_time = - cfs_time_shift(prandom_u32_max( - attr->u.delay.la_interval)); - rule->dl_time_base = cfs_time_shift(attr->u.delay.la_interval); - } - spin_unlock(&rule->dl_lock); - } - - lnet_net_unlock(cpt); -} - -int -lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data) -{ - struct lnet_fault_attr *attr; - struct lnet_fault_stat *stat; - - attr = (struct lnet_fault_attr *)data->ioc_inlbuf1; - - switch (opc) { - default: - return -EINVAL; - - case LNET_CTL_DROP_ADD: - if (!attr) - return -EINVAL; - - return lnet_drop_rule_add(attr); - - case LNET_CTL_DROP_DEL: - if (!attr) - return -EINVAL; - - data->ioc_count = lnet_drop_rule_del(attr->fa_src, - attr->fa_dst); - return 0; - - case LNET_CTL_DROP_RESET: - lnet_drop_rule_reset(); - return 0; - - case LNET_CTL_DROP_LIST: - stat = (struct lnet_fault_stat *)data->ioc_inlbuf2; - if (!attr || !stat) - return -EINVAL; - - return lnet_drop_rule_list(data->ioc_count, attr, stat); - - case LNET_CTL_DELAY_ADD: - if (!attr) - return -EINVAL; - - return lnet_delay_rule_add(attr); - - case LNET_CTL_DELAY_DEL: - if (!attr) - return -EINVAL; - - data->ioc_count = lnet_delay_rule_del(attr->fa_src, - attr->fa_dst, false); - return 0; - - case LNET_CTL_DELAY_RESET: - lnet_delay_rule_reset(); - return 0; - - case LNET_CTL_DELAY_LIST: - stat = (struct lnet_fault_stat *)data->ioc_inlbuf2; - if (!attr || !stat) - return -EINVAL; - - return lnet_delay_rule_list(data->ioc_count, attr, stat); - } -} - -int -lnet_fault_init(void) -{ - BUILD_BUG_ON(LNET_PUT_BIT != 1 << LNET_MSG_PUT); - BUILD_BUG_ON(LNET_ACK_BIT != 1 << LNET_MSG_ACK); - BUILD_BUG_ON(LNET_GET_BIT != 1 << LNET_MSG_GET); - BUILD_BUG_ON(LNET_REPLY_BIT != 1 << LNET_MSG_REPLY); - - mutex_init(&delay_dd.dd_mutex); - spin_lock_init(&delay_dd.dd_lock); - init_waitqueue_head(&delay_dd.dd_waitq); - init_waitqueue_head(&delay_dd.dd_ctl_waitq); - INIT_LIST_HEAD(&delay_dd.dd_sched_rules); - - return 0; -} - -void -lnet_fault_fini(void) -{ - lnet_drop_rule_del(0, 0); - lnet_delay_rule_del(0, 0, true); - - LASSERT(list_empty(&the_lnet.ln_drop_rules)); - LASSERT(list_empty(&the_lnet.ln_delay_rules)); - LASSERT(list_empty(&delay_dd.dd_sched_rules)); -} diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c deleted file mode 100644 index 3aba1421c741..000000000000 --- a/drivers/staging/lustre/lnet/lnet/nidstrings.c +++ /dev/null @@ -1,1258 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/nidstrings.c - * - * Author: Phil Schwan <phil@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> -#include <uapi/linux/lnet/nidstr.h> - -/* max value for numeric network address */ -#define MAX_NUMERIC_VALUE 0xffffffff - -#define IPSTRING_LENGTH 16 - -/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids - * consistent in all conversion functions. Some code fragments are copied - * around for the sake of clarity... - */ - -/* CAVEAT EMPTOR! Racey temporary buffer allocation! - * Choose the number of nidstrings to support the MAXIMUM expected number of - * concurrent users. If there are more, the returned string will be volatile. - * NB this number must allow for a process to be descheduled for a timeslice - * between getting its string and using it. - */ - -static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE]; -static int libcfs_nidstring_idx; - -static DEFINE_SPINLOCK(libcfs_nidstring_lock); - -static struct netstrfns *libcfs_namenum2netstrfns(const char *name); - -char * -libcfs_next_nidstring(void) -{ - char *str; - unsigned long flags; - - spin_lock_irqsave(&libcfs_nidstring_lock, flags); - - str = libcfs_nidstrings[libcfs_nidstring_idx++]; - if (libcfs_nidstring_idx == ARRAY_SIZE(libcfs_nidstrings)) - libcfs_nidstring_idx = 0; - - spin_unlock_irqrestore(&libcfs_nidstring_lock, flags); - return str; -} -EXPORT_SYMBOL(libcfs_next_nidstring); - -/** - * Nid range list syntax. - * \verbatim - * - * <nidlist> :== <nidrange> [ ' ' <nidrange> ] - * <nidrange> :== <addrrange> '@' <net> - * <addrrange> :== '*' | - * <ipaddr_range> | - * <cfs_expr_list> - * <ipaddr_range> :== <cfs_expr_list>.<cfs_expr_list>.<cfs_expr_list>. - * <cfs_expr_list> - * <cfs_expr_list> :== <number> | - * <expr_list> - * <expr_list> :== '[' <range_expr> [ ',' <range_expr>] ']' - * <range_expr> :== <number> | - * <number> '-' <number> | - * <number> '-' <number> '/' <number> - * <net> :== <netname> | <netname><number> - * <netname> :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" | - * "vib" | "ra" | "elan" | "mx" | "ptl" - * \endverbatim - */ - -/** - * Structure to represent \<nidrange\> token of the syntax. - * - * One of this is created for each \<net\> parsed. - */ -struct nidrange { - /** - * Link to list of this structures which is built on nid range - * list parsing. - */ - struct list_head nr_link; - /** - * List head for addrrange::ar_link. - */ - struct list_head nr_addrranges; - /** - * Flag indicating that *@<net> is found. - */ - int nr_all; - /** - * Pointer to corresponding element of libcfs_netstrfns. - */ - struct netstrfns *nr_netstrfns; - /** - * Number of network. E.g. 5 if \<net\> is "elan5". - */ - int nr_netnum; -}; - -/** - * Structure to represent \<addrrange\> token of the syntax. - */ -struct addrrange { - /** - * Link to nidrange::nr_addrranges. - */ - struct list_head ar_link; - /** - * List head for cfs_expr_list::el_list. - */ - struct list_head ar_numaddr_ranges; -}; - -/** - * Parses \<addrrange\> token on the syntax. - * - * Allocates struct addrrange and links to \a nidrange via - * (nidrange::nr_addrranges) - * - * \retval 0 if \a src parses to '*' | \<ipaddr_range\> | \<cfs_expr_list\> - * \retval -errno otherwise - */ -static int -parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange) -{ - struct addrrange *addrrange; - - if (src->ls_len == 1 && src->ls_str[0] == '*') { - nidrange->nr_all = 1; - return 0; - } - - addrrange = kzalloc(sizeof(struct addrrange), GFP_NOFS); - if (!addrrange) - return -ENOMEM; - list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges); - INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges); - - return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str, - src->ls_len, - &addrrange->ar_numaddr_ranges); -} - -/** - * Finds or creates struct nidrange. - * - * Checks if \a src is a valid network name, looks for corresponding - * nidrange on the ist of nidranges (\a nidlist), creates new struct - * nidrange if it is not found. - * - * \retval pointer to struct nidrange matching network specified via \a src - * \retval NULL if \a src does not match any network - */ -static struct nidrange * -add_nidrange(const struct cfs_lstr *src, - struct list_head *nidlist) -{ - struct netstrfns *nf; - struct nidrange *nr; - int endlen; - unsigned int netnum; - - if (src->ls_len >= LNET_NIDSTR_SIZE) - return NULL; - - nf = libcfs_namenum2netstrfns(src->ls_str); - if (!nf) - return NULL; - endlen = src->ls_len - strlen(nf->nf_name); - if (!endlen) - /* network name only, e.g. "elan" or "tcp" */ - netnum = 0; - else { - /* - * e.g. "elan25" or "tcp23", refuse to parse if - * network name is not appended with decimal or - * hexadecimal number - */ - if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name), - endlen, &netnum, 0, MAX_NUMERIC_VALUE)) - return NULL; - } - - list_for_each_entry(nr, nidlist, nr_link) { - if (nr->nr_netstrfns != nf) - continue; - if (nr->nr_netnum != netnum) - continue; - return nr; - } - - nr = kzalloc(sizeof(struct nidrange), GFP_NOFS); - if (!nr) - return NULL; - list_add_tail(&nr->nr_link, nidlist); - INIT_LIST_HEAD(&nr->nr_addrranges); - nr->nr_netstrfns = nf; - nr->nr_all = 0; - nr->nr_netnum = netnum; - - return nr; -} - -/** - * Parses \<nidrange\> token of the syntax. - * - * \retval 1 if \a src parses to \<addrrange\> '@' \<net\> - * \retval 0 otherwise - */ -static int -parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist) -{ - struct cfs_lstr addrrange; - struct cfs_lstr net; - struct nidrange *nr; - - if (!cfs_gettok(src, '@', &addrrange)) - goto failed; - - if (!cfs_gettok(src, '@', &net) || src->ls_str) - goto failed; - - nr = add_nidrange(&net, nidlist); - if (!nr) - goto failed; - - if (parse_addrange(&addrrange, nr)) - goto failed; - - return 1; -failed: - return 0; -} - -/** - * Frees addrrange structures of \a list. - * - * For each struct addrrange structure found on \a list it frees - * cfs_expr_list list attached to it and frees the addrrange itself. - * - * \retval none - */ -static void -free_addrranges(struct list_head *list) -{ - while (!list_empty(list)) { - struct addrrange *ar; - - ar = list_entry(list->next, struct addrrange, ar_link); - - cfs_expr_list_free_list(&ar->ar_numaddr_ranges); - list_del(&ar->ar_link); - kfree(ar); - } -} - -/** - * Frees nidrange strutures of \a list. - * - * For each struct nidrange structure found on \a list it frees - * addrrange list attached to it and frees the nidrange itself. - * - * \retval none - */ -void -cfs_free_nidlist(struct list_head *list) -{ - struct list_head *pos, *next; - struct nidrange *nr; - - list_for_each_safe(pos, next, list) { - nr = list_entry(pos, struct nidrange, nr_link); - free_addrranges(&nr->nr_addrranges); - list_del(pos); - kfree(nr); - } -} -EXPORT_SYMBOL(cfs_free_nidlist); - -/** - * Parses nid range list. - * - * Parses with rigorous syntax and overflow checking \a str into - * \<nidrange\> [ ' ' \<nidrange\> ], compiles \a str into set of - * structures and links that structure to \a nidlist. The resulting - * list can be used to match a NID againts set of NIDS defined by \a - * str. - * \see cfs_match_nid - * - * \retval 1 on success - * \retval 0 otherwise - */ -int -cfs_parse_nidlist(char *str, int len, struct list_head *nidlist) -{ - struct cfs_lstr src; - struct cfs_lstr res; - int rc; - - src.ls_str = str; - src.ls_len = len; - INIT_LIST_HEAD(nidlist); - while (src.ls_str) { - rc = cfs_gettok(&src, ' ', &res); - if (!rc) { - cfs_free_nidlist(nidlist); - return 0; - } - rc = parse_nidrange(&res, nidlist); - if (!rc) { - cfs_free_nidlist(nidlist); - return 0; - } - } - return 1; -} -EXPORT_SYMBOL(cfs_parse_nidlist); - -/** - * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist). - * - * \see cfs_parse_nidlist() - * - * \retval 1 on match - * \retval 0 otherwises - */ -int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist) -{ - struct nidrange *nr; - struct addrrange *ar; - - list_for_each_entry(nr, nidlist, nr_link) { - if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid))) - continue; - if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid))) - continue; - if (nr->nr_all) - return 1; - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) - if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid), - &ar->ar_numaddr_ranges)) - return 1; - } - return 0; -} -EXPORT_SYMBOL(cfs_match_nid); - -/** - * Print the network part of the nidrange \a nr into the specified \a buffer. - * - * \retval number of characters written - */ -static int -cfs_print_network(char *buffer, int count, struct nidrange *nr) -{ - struct netstrfns *nf = nr->nr_netstrfns; - - if (!nr->nr_netnum) - return scnprintf(buffer, count, "@%s", nf->nf_name); - else - return scnprintf(buffer, count, "@%s%u", - nf->nf_name, nr->nr_netnum); -} - -/** - * Print a list of addrrange (\a addrranges) into the specified \a buffer. - * At max \a count characters can be printed into \a buffer. - * - * \retval number of characters written - */ -static int -cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges, - struct nidrange *nr) -{ - int i = 0; - struct addrrange *ar; - struct netstrfns *nf = nr->nr_netstrfns; - - list_for_each_entry(ar, addrranges, ar_link) { - if (i) - i += scnprintf(buffer + i, count - i, " "); - i += nf->nf_print_addrlist(buffer + i, count - i, - &ar->ar_numaddr_ranges); - i += cfs_print_network(buffer + i, count - i, nr); - } - return i; -} - -/** - * Print a list of nidranges (\a nidlist) into the specified \a buffer. - * At max \a count characters can be printed into \a buffer. - * Nidranges are separated by a space character. - * - * \retval number of characters written - */ -int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist) -{ - int i = 0; - struct nidrange *nr; - - if (count <= 0) - return 0; - - list_for_each_entry(nr, nidlist, nr_link) { - if (i) - i += scnprintf(buffer + i, count - i, " "); - - if (nr->nr_all) { - LASSERT(list_empty(&nr->nr_addrranges)); - i += scnprintf(buffer + i, count - i, "*"); - i += cfs_print_network(buffer + i, count - i, nr); - } else { - i += cfs_print_addrranges(buffer + i, count - i, - &nr->nr_addrranges, nr); - } - } - return i; -} -EXPORT_SYMBOL(cfs_print_nidlist); - -/** - * Determines minimum and maximum addresses for a single - * numeric address range - * - * \param ar - * \param min_nid - * \param max_nid - */ -static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid, - __u32 *max_nid) -{ - struct cfs_expr_list *el; - struct cfs_range_expr *re; - __u32 tmp_ip_addr = 0; - unsigned int min_ip[4] = {0}; - unsigned int max_ip[4] = {0}; - int re_count = 0; - - list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) { - list_for_each_entry(re, &el->el_exprs, re_link) { - min_ip[re_count] = re->re_lo; - max_ip[re_count] = re->re_hi; - re_count++; - } - } - - tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) | - (min_ip[2] << 8) | min_ip[3]); - - if (min_nid) - *min_nid = tmp_ip_addr; - - tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) | - (max_ip[2] << 8) | max_ip[3]); - - if (max_nid) - *max_nid = tmp_ip_addr; -} - -/** - * Determines minimum and maximum addresses for a single - * numeric address range - * - * \param ar - * \param min_nid - * \param max_nid - */ -static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid, - __u32 *max_nid) -{ - struct cfs_expr_list *el; - struct cfs_range_expr *re; - unsigned int min_addr = 0; - unsigned int max_addr = 0; - - list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) { - list_for_each_entry(re, &el->el_exprs, re_link) { - if (re->re_lo < min_addr || !min_addr) - min_addr = re->re_lo; - if (re->re_hi > max_addr) - max_addr = re->re_hi; - } - } - - if (min_nid) - *min_nid = min_addr; - if (max_nid) - *max_nid = max_addr; -} - -/** - * Determines whether an expression list in an nidrange contains exactly - * one contiguous address range. Calls the correct netstrfns for the LND - * - * \param *nidlist - * - * \retval true if contiguous - * \retval false if not contiguous - */ -bool cfs_nidrange_is_contiguous(struct list_head *nidlist) -{ - struct nidrange *nr; - struct netstrfns *nf = NULL; - char *lndname = NULL; - int netnum = -1; - - list_for_each_entry(nr, nidlist, nr_link) { - nf = nr->nr_netstrfns; - if (!lndname) - lndname = nf->nf_name; - if (netnum == -1) - netnum = nr->nr_netnum; - - if (strcmp(lndname, nf->nf_name) || - netnum != nr->nr_netnum) - return false; - } - - if (!nf) - return false; - - if (!nf->nf_is_contiguous(nidlist)) - return false; - - return true; -} -EXPORT_SYMBOL(cfs_nidrange_is_contiguous); - -/** - * Determines whether an expression list in an num nidrange contains exactly - * one contiguous address range. - * - * \param *nidlist - * - * \retval true if contiguous - * \retval false if not contiguous - */ -static bool cfs_num_is_contiguous(struct list_head *nidlist) -{ - struct nidrange *nr; - struct addrrange *ar; - struct cfs_expr_list *el; - struct cfs_range_expr *re; - int last_hi = 0; - __u32 last_end_nid = 0; - __u32 current_start_nid = 0; - __u32 current_end_nid = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - cfs_num_ar_min_max(ar, ¤t_start_nid, - ¤t_end_nid); - if (last_end_nid && - (current_start_nid - last_end_nid != 1)) - return false; - last_end_nid = current_end_nid; - list_for_each_entry(el, &ar->ar_numaddr_ranges, - el_link) { - list_for_each_entry(re, &el->el_exprs, - re_link) { - if (re->re_stride > 1) - return false; - else if (last_hi && - re->re_hi - last_hi != 1) - return false; - last_hi = re->re_hi; - } - } - } - } - - return true; -} - -/** - * Determines whether an expression list in an ip nidrange contains exactly - * one contiguous address range. - * - * \param *nidlist - * - * \retval true if contiguous - * \retval false if not contiguous - */ -static bool cfs_ip_is_contiguous(struct list_head *nidlist) -{ - struct nidrange *nr; - struct addrrange *ar; - struct cfs_expr_list *el; - struct cfs_range_expr *re; - int expr_count; - int last_hi = 255; - int last_diff = 0; - __u32 last_end_nid = 0; - __u32 current_start_nid = 0; - __u32 current_end_nid = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - last_hi = 255; - last_diff = 0; - cfs_ip_ar_min_max(ar, ¤t_start_nid, - ¤t_end_nid); - if (last_end_nid && - (current_start_nid - last_end_nid != 1)) - return false; - last_end_nid = current_end_nid; - list_for_each_entry(el, &ar->ar_numaddr_ranges, - el_link) { - expr_count = 0; - list_for_each_entry(re, &el->el_exprs, - re_link) { - expr_count++; - if (re->re_stride > 1 || - (last_diff > 0 && last_hi != 255) || - (last_diff > 0 && last_hi == 255 && - re->re_lo > 0)) - return false; - last_hi = re->re_hi; - last_diff = re->re_hi - re->re_lo; - } - } - } - } - - return true; -} - -/** - * Takes a linked list of nidrange expressions, determines the minimum - * and maximum nid and creates appropriate nid structures - * - * \param *nidlist - * \param *min_nid - * \param *max_nid - */ -void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid, - char *max_nid, size_t nidstr_length) -{ - struct nidrange *nr; - struct netstrfns *nf = NULL; - int netnum = -1; - __u32 min_addr; - __u32 max_addr; - char *lndname = NULL; - char min_addr_str[IPSTRING_LENGTH]; - char max_addr_str[IPSTRING_LENGTH]; - - list_for_each_entry(nr, nidlist, nr_link) { - nf = nr->nr_netstrfns; - lndname = nf->nf_name; - if (netnum == -1) - netnum = nr->nr_netnum; - - nf->nf_min_max(nidlist, &min_addr, &max_addr); - } - nf->nf_addr2str(min_addr, min_addr_str, sizeof(min_addr_str)); - nf->nf_addr2str(max_addr, max_addr_str, sizeof(max_addr_str)); - - snprintf(min_nid, nidstr_length, "%s@%s%d", min_addr_str, lndname, - netnum); - snprintf(max_nid, nidstr_length, "%s@%s%d", max_addr_str, lndname, - netnum); -} -EXPORT_SYMBOL(cfs_nidrange_find_min_max); - -/** - * Determines the min and max NID values for num LNDs - * - * \param *nidlist - * \param *min_nid - * \param *max_nid - */ -static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid, - __u32 *max_nid) -{ - struct nidrange *nr; - struct addrrange *ar; - unsigned int tmp_min_addr = 0; - unsigned int tmp_max_addr = 0; - unsigned int min_addr = 0; - unsigned int max_addr = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - cfs_num_ar_min_max(ar, &tmp_min_addr, - &tmp_max_addr); - if (tmp_min_addr < min_addr || !min_addr) - min_addr = tmp_min_addr; - if (tmp_max_addr > max_addr) - max_addr = tmp_min_addr; - } - } - *max_nid = max_addr; - *min_nid = min_addr; -} - -/** - * Takes an nidlist and determines the minimum and maximum - * ip addresses. - * - * \param *nidlist - * \param *min_nid - * \param *max_nid - */ -static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid, - __u32 *max_nid) -{ - struct nidrange *nr; - struct addrrange *ar; - __u32 tmp_min_ip_addr = 0; - __u32 tmp_max_ip_addr = 0; - __u32 min_ip_addr = 0; - __u32 max_ip_addr = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - cfs_ip_ar_min_max(ar, &tmp_min_ip_addr, - &tmp_max_ip_addr); - if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr) - min_ip_addr = tmp_min_ip_addr; - if (tmp_max_ip_addr > max_ip_addr) - max_ip_addr = tmp_max_ip_addr; - } - } - - if (min_nid) - *min_nid = min_ip_addr; - if (max_nid) - *max_nid = max_ip_addr; -} - -static int -libcfs_lo_str2addr(const char *str, int nob, __u32 *addr) -{ - *addr = 0; - return 1; -} - -static void -libcfs_ip_addr2str(__u32 addr, char *str, size_t size) -{ - snprintf(str, size, "%u.%u.%u.%u", - (addr >> 24) & 0xff, (addr >> 16) & 0xff, - (addr >> 8) & 0xff, addr & 0xff); -} - -/* - * CAVEAT EMPTOR XscanfX - * I use "%n" at the end of a sscanf format to detect trailing junk. However - * sscanf may return immediately if it sees the terminating '0' in a string, so - * I initialise the %n variable to the expected length. If sscanf sets it; - * fine, if it doesn't, then the scan ended at the end of the string, which is - * fine too :) - */ -static int -libcfs_ip_str2addr(const char *str, int nob, __u32 *addr) -{ - unsigned int a; - unsigned int b; - unsigned int c; - unsigned int d; - int n = nob; /* XscanfX */ - - /* numeric IP? */ - if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && - n == nob && - !(a & ~0xff) && !(b & ~0xff) && - !(c & ~0xff) && !(d & ~0xff)) { - *addr = ((a << 24) | (b << 16) | (c << 8) | d); - return 1; - } - - return 0; -} - -/* Used by lnet/config.c so it can't be static */ -int -cfs_ip_addr_parse(char *str, int len, struct list_head *list) -{ - struct cfs_expr_list *el; - struct cfs_lstr src; - int rc; - int i; - - src.ls_str = str; - src.ls_len = len; - i = 0; - - while (src.ls_str) { - struct cfs_lstr res; - - if (!cfs_gettok(&src, '.', &res)) { - rc = -EINVAL; - goto out; - } - - rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el); - if (rc) - goto out; - - list_add_tail(&el->el_link, list); - i++; - } - - if (i == 4) - return 0; - - rc = -EINVAL; -out: - cfs_expr_list_free_list(list); - - return rc; -} - -static int -libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list) -{ - int i = 0, j = 0; - struct cfs_expr_list *el; - - list_for_each_entry(el, list, el_link) { - LASSERT(j++ < 4); - if (i) - i += scnprintf(buffer + i, count - i, "."); - i += cfs_expr_list_print(buffer + i, count - i, el); - } - return i; -} - -/** - * Matches address (\a addr) against address set encoded in \a list. - * - * \retval 1 if \a addr matches - * \retval 0 otherwise - */ -int -cfs_ip_addr_match(__u32 addr, struct list_head *list) -{ - struct cfs_expr_list *el; - int i = 0; - - list_for_each_entry_reverse(el, list, el_link) { - if (!cfs_expr_list_match(addr & 0xff, el)) - return 0; - addr >>= 8; - i++; - } - - return i == 4; -} - -static void -libcfs_decnum_addr2str(__u32 addr, char *str, size_t size) -{ - snprintf(str, size, "%u", addr); -} - -static int -libcfs_num_str2addr(const char *str, int nob, __u32 *addr) -{ - int n; - - n = nob; - if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) - return 1; - - return 0; -} - -/** - * Nf_parse_addrlist method for networks using numeric addresses. - * - * Examples of such networks are gm and elan. - * - * \retval 0 if \a str parsed to numeric address - * \retval errno otherwise - */ -static int -libcfs_num_parse(char *str, int len, struct list_head *list) -{ - struct cfs_expr_list *el; - int rc; - - rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el); - if (!rc) - list_add_tail(&el->el_link, list); - - return rc; -} - -static int -libcfs_num_addr_range_print(char *buffer, int count, struct list_head *list) -{ - int i = 0, j = 0; - struct cfs_expr_list *el; - - list_for_each_entry(el, list, el_link) { - LASSERT(j++ < 1); - i += cfs_expr_list_print(buffer + i, count - i, el); - } - return i; -} - -/* - * Nf_match_addr method for networks using numeric addresses - * - * \retval 1 on match - * \retval 0 otherwise - */ -static int -libcfs_num_match(__u32 addr, struct list_head *numaddr) -{ - struct cfs_expr_list *el; - - LASSERT(!list_empty(numaddr)); - el = list_entry(numaddr->next, struct cfs_expr_list, el_link); - - return cfs_expr_list_match(addr, el); -} - -static struct netstrfns libcfs_netstrfns[] = { - { .nf_type = LOLND, - .nf_name = "lo", - .nf_modname = "klolnd", - .nf_addr2str = libcfs_decnum_addr2str, - .nf_str2addr = libcfs_lo_str2addr, - .nf_parse_addrlist = libcfs_num_parse, - .nf_print_addrlist = libcfs_num_addr_range_print, - .nf_match_addr = libcfs_num_match, - .nf_is_contiguous = cfs_num_is_contiguous, - .nf_min_max = cfs_num_min_max }, - { .nf_type = SOCKLND, - .nf_name = "tcp", - .nf_modname = "ksocklnd", - .nf_addr2str = libcfs_ip_addr2str, - .nf_str2addr = libcfs_ip_str2addr, - .nf_parse_addrlist = cfs_ip_addr_parse, - .nf_print_addrlist = libcfs_ip_addr_range_print, - .nf_match_addr = cfs_ip_addr_match, - .nf_is_contiguous = cfs_ip_is_contiguous, - .nf_min_max = cfs_ip_min_max }, - { .nf_type = O2IBLND, - .nf_name = "o2ib", - .nf_modname = "ko2iblnd", - .nf_addr2str = libcfs_ip_addr2str, - .nf_str2addr = libcfs_ip_str2addr, - .nf_parse_addrlist = cfs_ip_addr_parse, - .nf_print_addrlist = libcfs_ip_addr_range_print, - .nf_match_addr = cfs_ip_addr_match, - .nf_is_contiguous = cfs_ip_is_contiguous, - .nf_min_max = cfs_ip_min_max }, - { .nf_type = GNILND, - .nf_name = "gni", - .nf_modname = "kgnilnd", - .nf_addr2str = libcfs_decnum_addr2str, - .nf_str2addr = libcfs_num_str2addr, - .nf_parse_addrlist = libcfs_num_parse, - .nf_print_addrlist = libcfs_num_addr_range_print, - .nf_match_addr = libcfs_num_match, - .nf_is_contiguous = cfs_num_is_contiguous, - .nf_min_max = cfs_num_min_max }, - { .nf_type = GNIIPLND, - .nf_name = "gip", - .nf_modname = "kgnilnd", - .nf_addr2str = libcfs_ip_addr2str, - .nf_str2addr = libcfs_ip_str2addr, - .nf_parse_addrlist = cfs_ip_addr_parse, - .nf_print_addrlist = libcfs_ip_addr_range_print, - .nf_match_addr = cfs_ip_addr_match, - .nf_is_contiguous = cfs_ip_is_contiguous, - .nf_min_max = cfs_ip_min_max }, -}; - -static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns); - -static struct netstrfns * -libcfs_lnd2netstrfns(__u32 lnd) -{ - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) - if (lnd == libcfs_netstrfns[i].nf_type) - return &libcfs_netstrfns[i]; - - return NULL; -} - -static struct netstrfns * -libcfs_namenum2netstrfns(const char *name) -{ - struct netstrfns *nf; - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) { - nf = &libcfs_netstrfns[i]; - if (!strncmp(name, nf->nf_name, strlen(nf->nf_name))) - return nf; - } - return NULL; -} - -static struct netstrfns * -libcfs_name2netstrfns(const char *name) -{ - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) - if (!strcmp(libcfs_netstrfns[i].nf_name, name)) - return &libcfs_netstrfns[i]; - - return NULL; -} - -int -libcfs_isknown_lnd(__u32 lnd) -{ - return !!libcfs_lnd2netstrfns(lnd); -} -EXPORT_SYMBOL(libcfs_isknown_lnd); - -char * -libcfs_lnd2modname(__u32 lnd) -{ - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - - return nf ? nf->nf_modname : NULL; -} -EXPORT_SYMBOL(libcfs_lnd2modname); - -int -libcfs_str2lnd(const char *str) -{ - struct netstrfns *nf = libcfs_name2netstrfns(str); - - if (nf) - return nf->nf_type; - - return -ENXIO; -} -EXPORT_SYMBOL(libcfs_str2lnd); - -char * -libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size) -{ - struct netstrfns *nf; - - nf = libcfs_lnd2netstrfns(lnd); - if (!nf) - snprintf(buf, buf_size, "?%u?", lnd); - else - snprintf(buf, buf_size, "%s", nf->nf_name); - - return buf; -} -EXPORT_SYMBOL(libcfs_lnd2str_r); - -char * -libcfs_net2str_r(__u32 net, char *buf, size_t buf_size) -{ - __u32 nnum = LNET_NETNUM(net); - __u32 lnd = LNET_NETTYP(net); - struct netstrfns *nf; - - nf = libcfs_lnd2netstrfns(lnd); - if (!nf) - snprintf(buf, buf_size, "<%u:%u>", lnd, nnum); - else if (!nnum) - snprintf(buf, buf_size, "%s", nf->nf_name); - else - snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum); - - return buf; -} -EXPORT_SYMBOL(libcfs_net2str_r); - -char * -libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size) -{ - __u32 addr = LNET_NIDADDR(nid); - __u32 net = LNET_NIDNET(nid); - __u32 nnum = LNET_NETNUM(net); - __u32 lnd = LNET_NETTYP(net); - struct netstrfns *nf; - - if (nid == LNET_NID_ANY) { - strncpy(buf, "<?>", buf_size); - buf[buf_size - 1] = '\0'; - return buf; - } - - nf = libcfs_lnd2netstrfns(lnd); - if (!nf) { - snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum); - } else { - size_t addr_len; - - nf->nf_addr2str(addr, buf, buf_size); - addr_len = strlen(buf); - if (!nnum) - snprintf(buf + addr_len, buf_size - addr_len, "@%s", - nf->nf_name); - else - snprintf(buf + addr_len, buf_size - addr_len, "@%s%u", - nf->nf_name, nnum); - } - - return buf; -} -EXPORT_SYMBOL(libcfs_nid2str_r); - -static struct netstrfns * -libcfs_str2net_internal(const char *str, __u32 *net) -{ - struct netstrfns *nf = NULL; - int nob; - unsigned int netnum; - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) { - nf = &libcfs_netstrfns[i]; - if (!strncmp(str, nf->nf_name, strlen(nf->nf_name))) - break; - } - - if (i == libcfs_nnetstrfns) - return NULL; - - nob = strlen(nf->nf_name); - - if (strlen(str) == (unsigned int)nob) { - netnum = 0; - } else { - if (nf->nf_type == LOLND) /* net number not allowed */ - return NULL; - - str += nob; - i = strlen(str); - if (sscanf(str, "%u%n", &netnum, &i) < 1 || - i != (int)strlen(str)) - return NULL; - } - - *net = LNET_MKNET(nf->nf_type, netnum); - return nf; -} - -__u32 -libcfs_str2net(const char *str) -{ - __u32 net; - - if (libcfs_str2net_internal(str, &net)) - return net; - - return LNET_NIDNET(LNET_NID_ANY); -} -EXPORT_SYMBOL(libcfs_str2net); - -lnet_nid_t -libcfs_str2nid(const char *str) -{ - const char *sep = strchr(str, '@'); - struct netstrfns *nf; - __u32 net; - __u32 addr; - - if (sep) { - nf = libcfs_str2net_internal(sep + 1, &net); - if (!nf) - return LNET_NID_ANY; - } else { - sep = str + strlen(str); - net = LNET_MKNET(SOCKLND, 0); - nf = libcfs_lnd2netstrfns(SOCKLND); - LASSERT(nf); - } - - if (!nf->nf_str2addr(str, (int)(sep - str), &addr)) - return LNET_NID_ANY; - - return LNET_MKNID(net, addr); -} -EXPORT_SYMBOL(libcfs_str2nid); - -char * -libcfs_id2str(struct lnet_process_id id) -{ - char *str = libcfs_next_nidstring(); - - if (id.pid == LNET_PID_ANY) { - snprintf(str, LNET_NIDSTR_SIZE, - "LNET_PID_ANY-%s", libcfs_nid2str(id.nid)); - return str; - } - - snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s", - id.pid & LNET_PID_USERFLAG ? "U" : "", - id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid)); - return str; -} -EXPORT_SYMBOL(libcfs_id2str); - -int -libcfs_str2anynid(lnet_nid_t *nidp, const char *str) -{ - if (!strcmp(str, "*")) { - *nidp = LNET_NID_ANY; - return 1; - } - - *nidp = libcfs_str2nid(str); - return *nidp != LNET_NID_ANY; -} -EXPORT_SYMBOL(libcfs_str2anynid); diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c deleted file mode 100644 index 3d4caa609c83..000000000000 --- a/drivers/staging/lustre/lnet/lnet/peer.c +++ /dev/null @@ -1,456 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/peer.c - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/lnet/lib-lnet.h> -#include <uapi/linux/lnet/lnet-dlc.h> - -int -lnet_peer_tables_create(void) -{ - struct lnet_peer_table *ptable; - struct list_head *hash; - int i; - int j; - - the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*ptable)); - if (!the_lnet.ln_peer_tables) { - CERROR("Failed to allocate cpu-partition peer tables\n"); - return -ENOMEM; - } - - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - INIT_LIST_HEAD(&ptable->pt_deathrow); - - hash = kvmalloc_cpt(LNET_PEER_HASH_SIZE * sizeof(*hash), - GFP_KERNEL, i); - if (!hash) { - CERROR("Failed to create peer hash table\n"); - lnet_peer_tables_destroy(); - return -ENOMEM; - } - - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) - INIT_LIST_HEAD(&hash[j]); - ptable->pt_hash = hash; /* sign of initialization */ - } - - return 0; -} - -void -lnet_peer_tables_destroy(void) -{ - struct lnet_peer_table *ptable; - struct list_head *hash; - int i; - int j; - - if (!the_lnet.ln_peer_tables) - return; - - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - hash = ptable->pt_hash; - if (!hash) /* not initialized */ - break; - - LASSERT(list_empty(&ptable->pt_deathrow)); - - ptable->pt_hash = NULL; - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) - LASSERT(list_empty(&hash[j])); - - kvfree(hash); - } - - cfs_percpt_free(the_lnet.ln_peer_tables); - the_lnet.ln_peer_tables = NULL; -} - -static void -lnet_peer_table_cleanup_locked(struct lnet_ni *ni, - struct lnet_peer_table *ptable) -{ - int i; - struct lnet_peer *lp; - struct lnet_peer *tmp; - - for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { - list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], - lp_hashlist) { - if (ni && ni != lp->lp_ni) - continue; - list_del_init(&lp->lp_hashlist); - /* Lose hash table's ref */ - ptable->pt_zombies++; - lnet_peer_decref_locked(lp); - } - } -} - -static void -lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable, - int cpt_locked) -{ - int i; - - for (i = 3; ptable->pt_zombies; i++) { - lnet_net_unlock(cpt_locked); - - if (is_power_of_2(i)) { - CDEBUG(D_WARNING, - "Waiting for %d zombies on peer table\n", - ptable->pt_zombies); - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ >> 1); - lnet_net_lock(cpt_locked); - } -} - -static void -lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni, - struct lnet_peer_table *ptable, - int cpt_locked) -{ - struct lnet_peer *lp; - struct lnet_peer *tmp; - lnet_nid_t lp_nid; - int i; - - for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { - list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], - lp_hashlist) { - if (ni != lp->lp_ni) - continue; - - if (!lp->lp_rtr_refcount) - continue; - - lp_nid = lp->lp_nid; - - lnet_net_unlock(cpt_locked); - lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid); - lnet_net_lock(cpt_locked); - } - } -} - -void -lnet_peer_tables_cleanup(struct lnet_ni *ni) -{ - struct lnet_peer_table *ptable; - struct list_head deathrow; - struct lnet_peer *lp; - struct lnet_peer *temp; - int i; - - INIT_LIST_HEAD(&deathrow); - - LASSERT(the_lnet.ln_shutdown || ni); - /* - * If just deleting the peers for a NI, get rid of any routes these - * peers are gateways for. - */ - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); - lnet_peer_table_del_rtrs_locked(ni, ptable, i); - lnet_net_unlock(i); - } - - /* - * Start the process of moving the applicable peers to - * deathrow. - */ - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); - lnet_peer_table_cleanup_locked(ni, ptable); - lnet_net_unlock(i); - } - - /* Cleanup all entries on deathrow. */ - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); - lnet_peer_table_deathrow_wait_locked(ptable, i); - list_splice_init(&ptable->pt_deathrow, &deathrow); - lnet_net_unlock(i); - } - - list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) { - list_del(&lp->lp_hashlist); - kfree(lp); - } -} - -void -lnet_destroy_peer_locked(struct lnet_peer *lp) -{ - struct lnet_peer_table *ptable; - - LASSERT(!lp->lp_refcount); - LASSERT(!lp->lp_rtr_refcount); - LASSERT(list_empty(&lp->lp_txq)); - LASSERT(list_empty(&lp->lp_hashlist)); - LASSERT(!lp->lp_txqnob); - - ptable = the_lnet.ln_peer_tables[lp->lp_cpt]; - LASSERT(ptable->pt_number > 0); - ptable->pt_number--; - - lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt); - lp->lp_ni = NULL; - - list_add(&lp->lp_hashlist, &ptable->pt_deathrow); - LASSERT(ptable->pt_zombies > 0); - ptable->pt_zombies--; -} - -struct lnet_peer * -lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid) -{ - struct list_head *peers; - struct lnet_peer *lp; - - LASSERT(!the_lnet.ln_shutdown); - - peers = &ptable->pt_hash[lnet_nid2peerhash(nid)]; - list_for_each_entry(lp, peers, lp_hashlist) { - if (lp->lp_nid == nid) { - lnet_peer_addref_locked(lp); - return lp; - } - } - - return NULL; -} - -int -lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt) -{ - struct lnet_peer_table *ptable; - struct lnet_peer *lp = NULL; - struct lnet_peer *lp2; - int cpt2; - int rc = 0; - - *lpp = NULL; - if (the_lnet.ln_shutdown) /* it's shutting down */ - return -ESHUTDOWN; - - /* cpt can be LNET_LOCK_EX if it's called from router functions */ - cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid); - - ptable = the_lnet.ln_peer_tables[cpt2]; - lp = lnet_find_peer_locked(ptable, nid); - if (lp) { - *lpp = lp; - return 0; - } - - if (!list_empty(&ptable->pt_deathrow)) { - lp = list_entry(ptable->pt_deathrow.next, - struct lnet_peer, lp_hashlist); - list_del(&lp->lp_hashlist); - } - - /* - * take extra refcount in case another thread has shutdown LNet - * and destroyed locks and peer-table before I finish the allocation - */ - ptable->pt_number++; - lnet_net_unlock(cpt); - - if (lp) - memset(lp, 0, sizeof(*lp)); - else - lp = kzalloc_cpt(sizeof(*lp), GFP_NOFS, cpt2); - - if (!lp) { - rc = -ENOMEM; - lnet_net_lock(cpt); - goto out; - } - - INIT_LIST_HEAD(&lp->lp_txq); - INIT_LIST_HEAD(&lp->lp_rtrq); - INIT_LIST_HEAD(&lp->lp_routes); - - lp->lp_notify = 0; - lp->lp_notifylnd = 0; - lp->lp_notifying = 0; - lp->lp_alive_count = 0; - lp->lp_timestamp = 0; - lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */ - lp->lp_last_alive = cfs_time_current(); /* assumes alive */ - lp->lp_last_query = 0; /* haven't asked NI yet */ - lp->lp_ping_timestamp = 0; - lp->lp_ping_feats = LNET_PING_FEAT_INVAL; - lp->lp_nid = nid; - lp->lp_cpt = cpt2; - lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ - lp->lp_rtr_refcount = 0; - - lnet_net_lock(cpt); - - if (the_lnet.ln_shutdown) { - rc = -ESHUTDOWN; - goto out; - } - - lp2 = lnet_find_peer_locked(ptable, nid); - if (lp2) { - *lpp = lp2; - goto out; - } - - lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2); - if (!lp->lp_ni) { - rc = -EHOSTUNREACH; - goto out; - } - - lp->lp_txcredits = lp->lp_ni->ni_peertxcredits; - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; - lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni); - lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni); - - list_add_tail(&lp->lp_hashlist, - &ptable->pt_hash[lnet_nid2peerhash(nid)]); - ptable->pt_version++; - *lpp = lp; - - return 0; -out: - if (lp) - list_add(&lp->lp_hashlist, &ptable->pt_deathrow); - ptable->pt_number--; - return rc; -} - -void -lnet_debug_peer(lnet_nid_t nid) -{ - char *aliveness = "NA"; - struct lnet_peer *lp; - int rc; - int cpt; - - cpt = lnet_cpt_of_nid(nid); - lnet_net_lock(cpt); - - rc = lnet_nid2peer_locked(&lp, nid, cpt); - if (rc) { - lnet_net_unlock(cpt); - CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid)); - return; - } - - if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) - aliveness = lp->lp_alive ? "up" : "down"; - - CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", - libcfs_nid2str(lp->lp_nid), lp->lp_refcount, - aliveness, lp->lp_ni->ni_peertxcredits, - lp->lp_rtrcredits, lp->lp_minrtrcredits, - lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob); - - lnet_peer_decref_locked(lp); - - lnet_net_unlock(cpt); -} - -int -lnet_get_peer_info(__u32 peer_index, __u64 *nid, - char aliveness[LNET_MAX_STR_LEN], - __u32 *cpt_iter, __u32 *refcount, - __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, - __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits, - __u32 *peer_tx_qnob) -{ - struct lnet_peer_table *peer_table; - struct lnet_peer *lp; - bool found = false; - int lncpt, j; - - /* get the number of CPTs */ - lncpt = cfs_percpt_number(the_lnet.ln_peer_tables); - - /* - * if the cpt number to be examined is >= the number of cpts in - * the system then indicate that there are no more cpts to examin - */ - if (*cpt_iter >= lncpt) - return -ENOENT; - - /* get the current table */ - peer_table = the_lnet.ln_peer_tables[*cpt_iter]; - /* if the ptable is NULL then there are no more cpts to examine */ - if (!peer_table) - return -ENOENT; - - lnet_net_lock(*cpt_iter); - - for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) { - struct list_head *peers = &peer_table->pt_hash[j]; - - list_for_each_entry(lp, peers, lp_hashlist) { - if (peer_index-- > 0) - continue; - - snprintf(aliveness, LNET_MAX_STR_LEN, "NA"); - if (lnet_isrouter(lp) || - lnet_peer_aliveness_enabled(lp)) - snprintf(aliveness, LNET_MAX_STR_LEN, - lp->lp_alive ? "up" : "down"); - - *nid = lp->lp_nid; - *refcount = lp->lp_refcount; - *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits; - *peer_tx_credits = lp->lp_txcredits; - *peer_rtr_credits = lp->lp_rtrcredits; - *peer_min_rtr_credits = lp->lp_mintxcredits; - *peer_tx_qnob = lp->lp_txqnob; - - found = true; - } - } - lnet_net_unlock(*cpt_iter); - - *cpt_iter = lncpt; - - return found ? 0 : -ENOENT; -} diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c deleted file mode 100644 index a3c3f4959f46..000000000000 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ /dev/null @@ -1,1800 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2015, Intel Corporation. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/completion.h> -#include <linux/lnet/lib-lnet.h> - -#define LNET_NRB_TINY_MIN 512 /* min value for each CPT */ -#define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4) -#define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */ -#define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4) -#define LNET_NRB_SMALL_PAGES 1 -#define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */ -#define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4) -#define LNET_NRB_LARGE_PAGES ((LNET_MTU + PAGE_SIZE - 1) >> \ - PAGE_SHIFT) - -static char *forwarding = ""; -module_param(forwarding, charp, 0444); -MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks"); - -static int tiny_router_buffers; -module_param(tiny_router_buffers, int, 0444); -MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router"); -static int small_router_buffers; -module_param(small_router_buffers, int, 0444); -MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router"); -static int large_router_buffers; -module_param(large_router_buffers, int, 0444); -MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router"); -static int peer_buffer_credits; -module_param(peer_buffer_credits, int, 0444); -MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer"); - -static int auto_down = 1; -module_param(auto_down, int, 0444); -MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error"); - -int -lnet_peer_buffer_credits(struct lnet_ni *ni) -{ - /* NI option overrides LNet default */ - if (ni->ni_peerrtrcredits > 0) - return ni->ni_peerrtrcredits; - if (peer_buffer_credits > 0) - return peer_buffer_credits; - - /* - * As an approximation, allow this peer the same number of router - * buffers as it is allowed outstanding sends - */ - return ni->ni_peertxcredits; -} - -/* forward ref's */ -static int lnet_router_checker(void *); - -static int check_routers_before_use; -module_param(check_routers_before_use, int, 0444); -MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use"); - -int avoid_asym_router_failure = 1; -module_param(avoid_asym_router_failure, int, 0644); -MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)"); - -static int dead_router_check_interval = 60; -module_param(dead_router_check_interval, int, 0644); -MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)"); - -static int live_router_check_interval = 60; -module_param(live_router_check_interval, int, 0644); -MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)"); - -static int router_ping_timeout = 50; -module_param(router_ping_timeout, int, 0644); -MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query"); - -int -lnet_peers_start_down(void) -{ - return check_routers_before_use; -} - -void -lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive, - unsigned long when) -{ - if (time_before(when, lp->lp_timestamp)) { /* out of date information */ - CDEBUG(D_NET, "Out of date\n"); - return; - } - - lp->lp_timestamp = when; /* update timestamp */ - lp->lp_ping_deadline = 0; /* disable ping timeout */ - - if (lp->lp_alive_count && /* got old news */ - (!lp->lp_alive) == (!alive)) { /* new date for old news */ - CDEBUG(D_NET, "Old news\n"); - return; - } - - /* Flag that notification is outstanding */ - - lp->lp_alive_count++; - lp->lp_alive = !(!alive); /* 1 bit! */ - lp->lp_notify = 1; - lp->lp_notifylnd |= notifylnd; - if (lp->lp_alive) - lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */ - - CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive); -} - -static void -lnet_ni_notify_locked(struct lnet_ni *ni, struct lnet_peer *lp) -{ - int alive; - int notifylnd; - - /* - * Notify only in 1 thread at any time to ensure ordered notification. - * NB individual events can be missed; the only guarantee is that you - * always get the most recent news - */ - if (lp->lp_notifying || !ni) - return; - - lp->lp_notifying = 1; - - while (lp->lp_notify) { - alive = lp->lp_alive; - notifylnd = lp->lp_notifylnd; - - lp->lp_notifylnd = 0; - lp->lp_notify = 0; - - if (notifylnd && ni->ni_lnd->lnd_notify) { - lnet_net_unlock(lp->lp_cpt); - - /* - * A new notification could happen now; I'll handle it - * when control returns to me - */ - ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive); - - lnet_net_lock(lp->lp_cpt); - } - } - - lp->lp_notifying = 0; -} - -static void -lnet_rtr_addref_locked(struct lnet_peer *lp) -{ - LASSERT(lp->lp_refcount > 0); - LASSERT(lp->lp_rtr_refcount >= 0); - - /* lnet_net_lock must be exclusively locked */ - lp->lp_rtr_refcount++; - if (lp->lp_rtr_refcount == 1) { - struct list_head *pos; - - /* a simple insertion sort */ - list_for_each_prev(pos, &the_lnet.ln_routers) { - struct lnet_peer *rtr; - - rtr = list_entry(pos, struct lnet_peer, lp_rtr_list); - if (rtr->lp_nid < lp->lp_nid) - break; - } - - list_add(&lp->lp_rtr_list, pos); - /* addref for the_lnet.ln_routers */ - lnet_peer_addref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -static void -lnet_rtr_decref_locked(struct lnet_peer *lp) -{ - LASSERT(lp->lp_refcount > 0); - LASSERT(lp->lp_rtr_refcount > 0); - - /* lnet_net_lock must be exclusively locked */ - lp->lp_rtr_refcount--; - if (!lp->lp_rtr_refcount) { - LASSERT(list_empty(&lp->lp_routes)); - - if (lp->lp_rcd) { - list_add(&lp->lp_rcd->rcd_list, - &the_lnet.ln_rcd_deathrow); - lp->lp_rcd = NULL; - } - - list_del(&lp->lp_rtr_list); - /* decref for the_lnet.ln_routers */ - lnet_peer_decref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -struct lnet_remotenet * -lnet_find_net_locked(__u32 net) -{ - struct lnet_remotenet *rnet; - struct list_head *rn_list; - - LASSERT(!the_lnet.ln_shutdown); - - rn_list = lnet_net2rnethash(net); - list_for_each_entry(rnet, rn_list, lrn_list) { - if (rnet->lrn_net == net) - return rnet; - } - return NULL; -} - -static void lnet_shuffle_seed(void) -{ - static int seeded; - struct lnet_ni *ni; - - if (seeded) - return; - - /* - * Nodes with small feet have little entropy - * the NID for this node gives the most entropy in the low bits - */ - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - __u32 lnd_type, seed; - - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - if (lnd_type != LOLND) { - seed = (LNET_NIDADDR(ni->ni_nid) | lnd_type); - add_device_randomness(&seed, sizeof(seed)); - } - } - - seeded = 1; -} - -/* NB expects LNET_LOCK held */ -static void -lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route) -{ - unsigned int len = 0; - unsigned int offset = 0; - struct list_head *e; - - lnet_shuffle_seed(); - - list_for_each(e, &rnet->lrn_routes) { - len++; - } - - /* len+1 positions to add a new entry */ - offset = prandom_u32_max(len + 1); - list_for_each(e, &rnet->lrn_routes) { - if (!offset) - break; - offset--; - } - list_add(&route->lr_list, e); - list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes); - - the_lnet.ln_remote_nets_version++; - lnet_rtr_addref_locked(route->lr_gateway); -} - -int -lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, - unsigned int priority) -{ - struct list_head *e; - struct lnet_remotenet *rnet; - struct lnet_remotenet *rnet2; - struct lnet_route *route; - struct lnet_ni *ni; - int add_route; - int rc; - - CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n", - libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway)); - - if (gateway == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND || - net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND || - LNET_NIDNET(gateway) == net || - (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255))) - return -EINVAL; - - if (lnet_islocalnet(net)) /* it's a local network */ - return -EEXIST; - - /* Assume net, route, all new */ - route = kzalloc(sizeof(*route), GFP_NOFS); - rnet = kzalloc(sizeof(*rnet), GFP_NOFS); - if (!route || !rnet) { - CERROR("Out of memory creating route %s %d %s\n", - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - kfree(route); - kfree(rnet); - return -ENOMEM; - } - - INIT_LIST_HEAD(&rnet->lrn_routes); - rnet->lrn_net = net; - route->lr_hops = hops; - route->lr_net = net; - route->lr_priority = priority; - - lnet_net_lock(LNET_LOCK_EX); - - rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX); - if (rc) { - lnet_net_unlock(LNET_LOCK_EX); - - kfree(route); - kfree(rnet); - - if (rc == -EHOSTUNREACH) /* gateway is not on a local net */ - return rc; /* ignore the route entry */ - CERROR("Error %d creating route %s %d %s\n", rc, - libcfs_net2str(net), hops, - libcfs_nid2str(gateway)); - return rc; - } - - LASSERT(!the_lnet.ln_shutdown); - - rnet2 = lnet_find_net_locked(net); - if (!rnet2) { - /* new network */ - list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net)); - rnet2 = rnet; - } - - /* Search for a duplicate route (it's a NOOP if it is) */ - add_route = 1; - list_for_each(e, &rnet2->lrn_routes) { - struct lnet_route *route2; - - route2 = list_entry(e, struct lnet_route, lr_list); - if (route2->lr_gateway == route->lr_gateway) { - add_route = 0; - break; - } - - /* our lookups must be true */ - LASSERT(route2->lr_gateway->lp_nid != gateway); - } - - if (add_route) { - lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */ - lnet_add_route_to_rnet(rnet2, route); - - ni = route->lr_gateway->lp_ni; - lnet_net_unlock(LNET_LOCK_EX); - - /* XXX Assume alive */ - if (ni->ni_lnd->lnd_notify) - ni->ni_lnd->lnd_notify(ni, gateway, 1); - - lnet_net_lock(LNET_LOCK_EX); - } - - /* -1 for notify or !add_route */ - lnet_peer_decref_locked(route->lr_gateway); - lnet_net_unlock(LNET_LOCK_EX); - rc = 0; - - if (!add_route) { - rc = -EEXIST; - kfree(route); - } - - if (rnet != rnet2) - kfree(rnet); - - /* indicate to startup the router checker if configured */ - wake_up(&the_lnet.ln_rc_waitq); - - return rc; -} - -int -lnet_check_routes(void) -{ - struct lnet_remotenet *rnet; - struct lnet_route *route; - struct lnet_route *route2; - struct list_head *e1; - struct list_head *e2; - int cpt; - struct list_head *rn_list; - int i; - - cpt = lnet_net_lock_current(); - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) { - rn_list = &the_lnet.ln_remote_nets_hash[i]; - list_for_each(e1, rn_list) { - rnet = list_entry(e1, struct lnet_remotenet, lrn_list); - - route2 = NULL; - list_for_each(e2, &rnet->lrn_routes) { - lnet_nid_t nid1; - lnet_nid_t nid2; - int net; - - route = list_entry(e2, struct lnet_route, lr_list); - - if (!route2) { - route2 = route; - continue; - } - - if (route->lr_gateway->lp_ni == - route2->lr_gateway->lp_ni) - continue; - - nid1 = route->lr_gateway->lp_nid; - nid2 = route2->lr_gateway->lp_nid; - net = rnet->lrn_net; - - lnet_net_unlock(cpt); - - CERROR("Routes to %s via %s and %s not supported\n", - libcfs_net2str(net), - libcfs_nid2str(nid1), - libcfs_nid2str(nid2)); - return -EINVAL; - } - } - } - - lnet_net_unlock(cpt); - return 0; -} - -int -lnet_del_route(__u32 net, lnet_nid_t gw_nid) -{ - struct lnet_peer *gateway; - struct lnet_remotenet *rnet; - struct lnet_route *route; - struct list_head *e1; - struct list_head *e2; - int rc = -ENOENT; - struct list_head *rn_list; - int idx = 0; - - CDEBUG(D_NET, "Del route: net %s : gw %s\n", - libcfs_net2str(net), libcfs_nid2str(gw_nid)); - - /* - * NB Caller may specify either all routes via the given gateway - * or a specific route entry actual NIDs) - */ - lnet_net_lock(LNET_LOCK_EX); - if (net == LNET_NIDNET(LNET_NID_ANY)) - rn_list = &the_lnet.ln_remote_nets_hash[0]; - else - rn_list = lnet_net2rnethash(net); - - again: - list_for_each(e1, rn_list) { - rnet = list_entry(e1, struct lnet_remotenet, lrn_list); - - if (!(net == LNET_NIDNET(LNET_NID_ANY) || - net == rnet->lrn_net)) - continue; - - list_for_each(e2, &rnet->lrn_routes) { - route = list_entry(e2, struct lnet_route, lr_list); - - gateway = route->lr_gateway; - if (!(gw_nid == LNET_NID_ANY || - gw_nid == gateway->lp_nid)) - continue; - - list_del(&route->lr_list); - list_del(&route->lr_gwlist); - the_lnet.ln_remote_nets_version++; - - if (list_empty(&rnet->lrn_routes)) - list_del(&rnet->lrn_list); - else - rnet = NULL; - - lnet_rtr_decref_locked(gateway); - lnet_peer_decref_locked(gateway); - - lnet_net_unlock(LNET_LOCK_EX); - - kfree(route); - kfree(rnet); - - rc = 0; - lnet_net_lock(LNET_LOCK_EX); - goto again; - } - } - - if (net == LNET_NIDNET(LNET_NID_ANY) && - ++idx < LNET_REMOTE_NETS_HASH_SIZE) { - rn_list = &the_lnet.ln_remote_nets_hash[idx]; - goto again; - } - lnet_net_unlock(LNET_LOCK_EX); - - return rc; -} - -void -lnet_destroy_routes(void) -{ - lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY); -} - -int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg) -{ - int i, rc = -ENOENT, j; - - if (!the_lnet.ln_rtrpools) - return rc; - - for (i = 0; i < LNET_NRBPOOLS; i++) { - struct lnet_rtrbufpool *rbp; - - lnet_net_lock(LNET_LOCK_EX); - cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) { - if (i++ != idx) - continue; - - pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages; - pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers; - pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits; - pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits; - rc = 0; - break; - } - lnet_net_unlock(LNET_LOCK_EX); - } - - lnet_net_lock(LNET_LOCK_EX); - pool_cfg->pl_routing = the_lnet.ln_routing; - lnet_net_unlock(LNET_LOCK_EX); - - return rc; -} - -int -lnet_get_route(int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive, __u32 *priority) -{ - struct list_head *e1; - struct list_head *e2; - struct lnet_remotenet *rnet; - struct lnet_route *route; - int cpt; - int i; - struct list_head *rn_list; - - cpt = lnet_net_lock_current(); - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) { - rn_list = &the_lnet.ln_remote_nets_hash[i]; - list_for_each(e1, rn_list) { - rnet = list_entry(e1, struct lnet_remotenet, lrn_list); - - list_for_each(e2, &rnet->lrn_routes) { - route = list_entry(e2, struct lnet_route, - lr_list); - - if (!idx--) { - *net = rnet->lrn_net; - *hops = route->lr_hops; - *priority = route->lr_priority; - *gateway = route->lr_gateway->lp_nid; - *alive = lnet_is_route_alive(route); - lnet_net_unlock(cpt); - return 0; - } - } - } - } - - lnet_net_unlock(cpt); - return -ENOENT; -} - -void -lnet_swap_pinginfo(struct lnet_ping_info *info) -{ - int i; - struct lnet_ni_status *stat; - - __swab32s(&info->pi_magic); - __swab32s(&info->pi_features); - __swab32s(&info->pi_pid); - __swab32s(&info->pi_nnis); - for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) { - stat = &info->pi_ni[i]; - __swab64s(&stat->ns_nid); - __swab32s(&stat->ns_status); - } -} - -/** - * parse router-checker pinginfo, record number of down NIs for remote - * networks on that router. - */ -static void -lnet_parse_rc_info(struct lnet_rc_data *rcd) -{ - struct lnet_ping_info *info = rcd->rcd_pinginfo; - struct lnet_peer *gw = rcd->rcd_gateway; - struct lnet_route *rte; - - if (!gw->lp_alive) - return; - - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) - lnet_swap_pinginfo(info); - - /* NB always racing with network! */ - if (info->pi_magic != LNET_PROTO_PING_MAGIC) { - CDEBUG(D_NET, "%s: Unexpected magic %08x\n", - libcfs_nid2str(gw->lp_nid), info->pi_magic); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; - return; - } - - gw->lp_ping_feats = info->pi_features; - if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) { - CDEBUG(D_NET, "%s: Unexpected features 0x%x\n", - libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats); - return; /* nothing I can understand */ - } - - if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) - return; /* can't carry NI status info */ - - list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { - int down = 0; - int up = 0; - int i; - - if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) { - rte->lr_downis = 1; - continue; - } - - for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) { - struct lnet_ni_status *stat = &info->pi_ni[i]; - lnet_nid_t nid = stat->ns_nid; - - if (nid == LNET_NID_ANY) { - CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n", - libcfs_nid2str(gw->lp_nid)); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; - return; - } - - if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) - continue; - - if (stat->ns_status == LNET_NI_STATUS_DOWN) { - down++; - continue; - } - - if (stat->ns_status == LNET_NI_STATUS_UP) { - if (LNET_NIDNET(nid) == rte->lr_net) { - up = 1; - break; - } - continue; - } - - CDEBUG(D_NET, "%s: Unexpected status 0x%x\n", - libcfs_nid2str(gw->lp_nid), stat->ns_status); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; - return; - } - - if (up) { /* ignore downed NIs if NI for dest network is up */ - rte->lr_downis = 0; - continue; - } - /** - * if @down is zero and this route is single-hop, it means - * we can't find NI for target network - */ - if (!down && rte->lr_hops == 1) - down = 1; - - rte->lr_downis = down; - } -} - -static void -lnet_router_checker_event(struct lnet_event *event) -{ - struct lnet_rc_data *rcd = event->md.user_ptr; - struct lnet_peer *lp; - - LASSERT(rcd); - - if (event->unlinked) { - LNetInvalidateMDHandle(&rcd->rcd_mdh); - return; - } - - LASSERT(event->type == LNET_EVENT_SEND || - event->type == LNET_EVENT_REPLY); - - lp = rcd->rcd_gateway; - LASSERT(lp); - - /* - * NB: it's called with holding lnet_res_lock, we have a few - * places need to hold both locks at the same time, please take - * care of lock ordering - */ - lnet_net_lock(lp->lp_cpt); - if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) { - /* ignore if no longer a router or rcd is replaced */ - goto out; - } - - if (event->type == LNET_EVENT_SEND) { - lp->lp_ping_notsent = 0; - if (!event->status) - goto out; - } - - /* LNET_EVENT_REPLY */ - /* - * A successful REPLY means the router is up. If _any_ comms - * to the router fail I assume it's down (this will happen if - * we ping alive routers to try to detect router death before - * apps get burned). - */ - lnet_notify_locked(lp, 1, !event->status, cfs_time_current()); - - /* - * The router checker will wake up very shortly and do the - * actual notification. - * XXX If 'lp' stops being a router before then, it will still - * have the notification pending!!! - */ - if (avoid_asym_router_failure && !event->status) - lnet_parse_rc_info(rcd); - - out: - lnet_net_unlock(lp->lp_cpt); -} - -static void -lnet_wait_known_routerstate(void) -{ - struct lnet_peer *rtr; - struct list_head *entry; - int all_known; - - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - - for (;;) { - int cpt = lnet_net_lock_current(); - - all_known = 1; - list_for_each(entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, struct lnet_peer, lp_rtr_list); - - if (!rtr->lp_alive_count) { - all_known = 0; - break; - } - } - - lnet_net_unlock(cpt); - - if (all_known) - return; - - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } -} - -void -lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net) -{ - struct lnet_route *rte; - - if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) { - list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { - if (rte->lr_net == net) { - rte->lr_downis = 0; - break; - } - } - } -} - -static void -lnet_update_ni_status_locked(void) -{ - struct lnet_ni *ni; - time64_t now; - int timeout; - - LASSERT(the_lnet.ln_routing); - - timeout = router_ping_timeout + - max(live_router_check_interval, dead_router_check_interval); - - now = ktime_get_real_seconds(); - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - if (ni->ni_lnd->lnd_type == LOLND) - continue; - - if (now < ni->ni_last_alive + timeout) - continue; - - lnet_ni_lock(ni); - /* re-check with lock */ - if (now < ni->ni_last_alive + timeout) { - lnet_ni_unlock(ni); - continue; - } - - LASSERT(ni->ni_status); - - if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) { - CDEBUG(D_NET, "NI(%s:%d) status changed to down\n", - libcfs_nid2str(ni->ni_nid), timeout); - /* - * NB: so far, this is the only place to set - * NI status to "down" - */ - ni->ni_status->ns_status = LNET_NI_STATUS_DOWN; - } - lnet_ni_unlock(ni); - } -} - -static void -lnet_destroy_rc_data(struct lnet_rc_data *rcd) -{ - LASSERT(list_empty(&rcd->rcd_list)); - /* detached from network */ - LASSERT(LNetMDHandleIsInvalid(rcd->rcd_mdh)); - - if (rcd->rcd_gateway) { - int cpt = rcd->rcd_gateway->lp_cpt; - - lnet_net_lock(cpt); - lnet_peer_decref_locked(rcd->rcd_gateway); - lnet_net_unlock(cpt); - } - - kfree(rcd->rcd_pinginfo); - - kfree(rcd); -} - -static struct lnet_rc_data * -lnet_create_rc_data_locked(struct lnet_peer *gateway) -{ - struct lnet_rc_data *rcd = NULL; - struct lnet_ping_info *pi; - struct lnet_md md; - int rc; - int i; - - lnet_net_unlock(gateway->lp_cpt); - - rcd = kzalloc(sizeof(*rcd), GFP_NOFS); - if (!rcd) - goto out; - - LNetInvalidateMDHandle(&rcd->rcd_mdh); - INIT_LIST_HEAD(&rcd->rcd_list); - - pi = kzalloc(LNET_PINGINFO_SIZE, GFP_NOFS); - if (!pi) - goto out; - - for (i = 0; i < LNET_MAX_RTR_NIS; i++) { - pi->pi_ni[i].ns_nid = LNET_NID_ANY; - pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID; - } - rcd->rcd_pinginfo = pi; - - md.start = pi; - md.user_ptr = rcd; - md.length = LNET_PINGINFO_SIZE; - md.threshold = LNET_MD_THRESH_INF; - md.options = LNET_MD_TRUNCATE; - md.eq_handle = the_lnet.ln_rc_eqh; - - LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh)); - rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh); - if (rc < 0) { - CERROR("Can't bind MD: %d\n", rc); - goto out; - } - LASSERT(!rc); - - lnet_net_lock(gateway->lp_cpt); - /* router table changed or someone has created rcd for this gateway */ - if (!lnet_isrouter(gateway) || gateway->lp_rcd) { - lnet_net_unlock(gateway->lp_cpt); - goto out; - } - - lnet_peer_addref_locked(gateway); - rcd->rcd_gateway = gateway; - gateway->lp_rcd = rcd; - gateway->lp_ping_notsent = 0; - - return rcd; - - out: - if (rcd) { - if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) { - rc = LNetMDUnlink(rcd->rcd_mdh); - LASSERT(!rc); - } - lnet_destroy_rc_data(rcd); - } - - lnet_net_lock(gateway->lp_cpt); - return gateway->lp_rcd; -} - -static int -lnet_router_check_interval(struct lnet_peer *rtr) -{ - int secs; - - secs = rtr->lp_alive ? live_router_check_interval : - dead_router_check_interval; - if (secs < 0) - secs = 0; - - return secs; -} - -static void -lnet_ping_router_locked(struct lnet_peer *rtr) -{ - struct lnet_rc_data *rcd = NULL; - unsigned long now = cfs_time_current(); - int secs; - - lnet_peer_addref_locked(rtr); - - if (rtr->lp_ping_deadline && /* ping timed out? */ - cfs_time_after(now, rtr->lp_ping_deadline)) - lnet_notify_locked(rtr, 1, 0, now); - - /* Run any outstanding notifications */ - lnet_ni_notify_locked(rtr->lp_ni, rtr); - - if (!lnet_isrouter(rtr) || - the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { - /* router table changed or router checker is shutting down */ - lnet_peer_decref_locked(rtr); - return; - } - - rcd = rtr->lp_rcd ? - rtr->lp_rcd : lnet_create_rc_data_locked(rtr); - - if (!rcd) - return; - - secs = lnet_router_check_interval(rtr); - - CDEBUG(D_NET, - "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n", - libcfs_nid2str(rtr->lp_nid), secs, - rtr->lp_ping_deadline, rtr->lp_ping_notsent, - rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp); - - if (secs && !rtr->lp_ping_notsent && - cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp, - secs * HZ))) { - int rc; - struct lnet_process_id id; - struct lnet_handle_md mdh; - - id.nid = rtr->lp_nid; - id.pid = LNET_PID_LUSTRE; - CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id)); - - rtr->lp_ping_notsent = 1; - rtr->lp_ping_timestamp = now; - - mdh = rcd->rcd_mdh; - - if (!rtr->lp_ping_deadline) { - rtr->lp_ping_deadline = - cfs_time_shift(router_ping_timeout); - } - - lnet_net_unlock(rtr->lp_cpt); - - rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - - lnet_net_lock(rtr->lp_cpt); - if (rc) - rtr->lp_ping_notsent = 0; /* no event pending */ - } - - lnet_peer_decref_locked(rtr); -} - -int -lnet_router_checker_start(void) -{ - struct task_struct *task; - int rc; - int eqsz = 0; - - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - if (check_routers_before_use && - dead_router_check_interval <= 0) { - LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n"); - return -EINVAL; - } - - init_completion(&the_lnet.ln_rc_signal); - - rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh); - if (rc) { - CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc); - return -ENOMEM; - } - - the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING; - task = kthread_run(lnet_router_checker, NULL, "router_checker"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("Can't start router checker thread: %d\n", rc); - /* block until event callback signals exit */ - wait_for_completion(&the_lnet.ln_rc_signal); - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT(!rc); - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - return -ENOMEM; - } - - if (check_routers_before_use) { - /* - * Note that a helpful side-effect of pinging all known routers - * at startup is that it makes them drop stale connections they - * may have to a previous instance of me. - */ - lnet_wait_known_routerstate(); - } - - return 0; -} - -void -lnet_router_checker_stop(void) -{ - int rc; - - if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN) - return; - - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING; - /* wakeup the RC thread if it's sleeping */ - wake_up(&the_lnet.ln_rc_waitq); - - /* block until event callback signals exit */ - wait_for_completion(&the_lnet.ln_rc_signal); - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT(!rc); -} - -static void -lnet_prune_rc_data(int wait_unlink) -{ - struct lnet_rc_data *rcd; - struct lnet_rc_data *tmp; - struct lnet_peer *lp; - struct list_head head; - int i = 2; - - if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING && - list_empty(&the_lnet.ln_rcd_deathrow) && - list_empty(&the_lnet.ln_rcd_zombie))) - return; - - INIT_LIST_HEAD(&head); - - lnet_net_lock(LNET_LOCK_EX); - - if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { - /* router checker is stopping, prune all */ - list_for_each_entry(lp, &the_lnet.ln_routers, - lp_rtr_list) { - if (!lp->lp_rcd) - continue; - - LASSERT(list_empty(&lp->lp_rcd->rcd_list)); - list_add(&lp->lp_rcd->rcd_list, - &the_lnet.ln_rcd_deathrow); - lp->lp_rcd = NULL; - } - } - - /* unlink all RCDs on deathrow list */ - list_splice_init(&the_lnet.ln_rcd_deathrow, &head); - - if (!list_empty(&head)) { - lnet_net_unlock(LNET_LOCK_EX); - - list_for_each_entry(rcd, &head, rcd_list) - LNetMDUnlink(rcd->rcd_mdh); - - lnet_net_lock(LNET_LOCK_EX); - } - - list_splice_init(&head, &the_lnet.ln_rcd_zombie); - - /* release all zombie RCDs */ - while (!list_empty(&the_lnet.ln_rcd_zombie)) { - list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie, - rcd_list) { - if (LNetMDHandleIsInvalid(rcd->rcd_mdh)) - list_move(&rcd->rcd_list, &head); - } - - wait_unlink = wait_unlink && - !list_empty(&the_lnet.ln_rcd_zombie); - - lnet_net_unlock(LNET_LOCK_EX); - - while (!list_empty(&head)) { - rcd = list_entry(head.next, - struct lnet_rc_data, rcd_list); - list_del_init(&rcd->rcd_list); - lnet_destroy_rc_data(rcd); - } - - if (!wait_unlink) - return; - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for rc buffers to unlink\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ / 4); - - lnet_net_lock(LNET_LOCK_EX); - } - - lnet_net_unlock(LNET_LOCK_EX); -} - -/* - * This function is called to check if the RC should block indefinitely. - * It's called from lnet_router_checker() as well as being passed to - * wait_event_interruptible() to avoid the lost wake_up problem. - * - * When it's called from wait_event_interruptible() it is necessary to - * also not sleep if the rc state is not running to avoid a deadlock - * when the system is shutting down - */ -static inline bool -lnet_router_checker_active(void) -{ - if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) - return true; - - /* - * Router Checker thread needs to run when routing is enabled in - * order to call lnet_update_ni_status_locked() - */ - if (the_lnet.ln_routing) - return true; - - return !list_empty(&the_lnet.ln_routers) && - (live_router_check_interval > 0 || - dead_router_check_interval > 0); -} - -static int -lnet_router_checker(void *arg) -{ - struct lnet_peer *rtr; - struct list_head *entry; - - while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { - __u64 version; - int cpt; - int cpt2; - - cpt = lnet_net_lock_current(); -rescan: - version = the_lnet.ln_routers_version; - - list_for_each(entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, struct lnet_peer, lp_rtr_list); - - cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid); - if (cpt != cpt2) { - lnet_net_unlock(cpt); - cpt = cpt2; - lnet_net_lock(cpt); - /* the routers list has changed */ - if (version != the_lnet.ln_routers_version) - goto rescan; - } - - lnet_ping_router_locked(rtr); - - /* NB dropped lock */ - if (version != the_lnet.ln_routers_version) { - /* the routers list has changed */ - goto rescan; - } - } - - if (the_lnet.ln_routing) - lnet_update_ni_status_locked(); - - lnet_net_unlock(cpt); - - lnet_prune_rc_data(0); /* don't wait for UNLINK */ - - /* - * Call schedule_timeout() here always adds 1 to load average - * because kernel counts # active tasks as nr_running - * + nr_uninterruptible. - */ - /* - * if there are any routes then wakeup every second. If - * there are no routes then sleep indefinitely until woken - * up by a user adding a route - */ - if (!lnet_router_checker_active()) - wait_event_interruptible(the_lnet.ln_rc_waitq, - lnet_router_checker_active()); - else - wait_event_interruptible_timeout(the_lnet.ln_rc_waitq, - false, - HZ); - } - - lnet_prune_rc_data(1); /* wait for UNLINK */ - - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - complete(&the_lnet.ln_rc_signal); - /* The unlink event callback will signal final completion */ - return 0; -} - -void -lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages) -{ - while (--npages >= 0) - __free_page(rb->rb_kiov[npages].bv_page); - - kfree(rb); -} - -static struct lnet_rtrbuf * -lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt) -{ - int npages = rbp->rbp_npages; - int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]); - struct page *page; - struct lnet_rtrbuf *rb; - int i; - - rb = kzalloc_cpt(sz, GFP_NOFS, cpt); - if (!rb) - return NULL; - - rb->rb_pool = rbp; - - for (i = 0; i < npages; i++) { - page = alloc_pages_node( - cfs_cpt_spread_node(lnet_cpt_table(), cpt), - GFP_KERNEL | __GFP_ZERO, 0); - if (!page) { - while (--i >= 0) - __free_page(rb->rb_kiov[i].bv_page); - - kfree(rb); - return NULL; - } - - rb->rb_kiov[i].bv_len = PAGE_SIZE; - rb->rb_kiov[i].bv_offset = 0; - rb->rb_kiov[i].bv_page = page; - } - - return rb; -} - -static void -lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt) -{ - int npages = rbp->rbp_npages; - struct list_head tmp; - struct lnet_rtrbuf *rb; - struct lnet_rtrbuf *temp; - - if (!rbp->rbp_nbuffers) /* not initialized or already freed */ - return; - - INIT_LIST_HEAD(&tmp); - - lnet_net_lock(cpt); - lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt); - list_splice_init(&rbp->rbp_bufs, &tmp); - rbp->rbp_req_nbuffers = 0; - rbp->rbp_nbuffers = 0; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; - lnet_net_unlock(cpt); - - /* Free buffers on the free list. */ - list_for_each_entry_safe(rb, temp, &tmp, rb_list) { - list_del(&rb->rb_list); - lnet_destroy_rtrbuf(rb, npages); - } -} - -static int -lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt) -{ - struct list_head rb_list; - struct lnet_rtrbuf *rb; - int num_rb; - int num_buffers = 0; - int old_req_nbufs; - int npages = rbp->rbp_npages; - - lnet_net_lock(cpt); - /* - * If we are called for less buffers than already in the pool, we - * just lower the req_nbuffers number and excess buffers will be - * thrown away as they are returned to the free list. Credits - * then get adjusted as well. - * If we already have enough buffers allocated to serve the - * increase requested, then we can treat that the same way as we - * do the decrease. - */ - num_rb = nbufs - rbp->rbp_nbuffers; - if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) { - rbp->rbp_req_nbuffers = nbufs; - lnet_net_unlock(cpt); - return 0; - } - /* - * store the older value of rbp_req_nbuffers and then set it to - * the new request to prevent lnet_return_rx_credits_locked() from - * freeing buffers that we need to keep around - */ - old_req_nbufs = rbp->rbp_req_nbuffers; - rbp->rbp_req_nbuffers = nbufs; - lnet_net_unlock(cpt); - - INIT_LIST_HEAD(&rb_list); - - /* - * allocate the buffers on a local list first. If all buffers are - * allocated successfully then join this list to the rbp buffer - * list. If not then free all allocated buffers. - */ - while (num_rb-- > 0) { - rb = lnet_new_rtrbuf(rbp, cpt); - if (!rb) { - CERROR("Failed to allocate %d route bufs of %d pages\n", - nbufs, npages); - - lnet_net_lock(cpt); - rbp->rbp_req_nbuffers = old_req_nbufs; - lnet_net_unlock(cpt); - - goto failed; - } - - list_add(&rb->rb_list, &rb_list); - num_buffers++; - } - - lnet_net_lock(cpt); - - list_splice_tail(&rb_list, &rbp->rbp_bufs); - rbp->rbp_nbuffers += num_buffers; - rbp->rbp_credits += num_buffers; - rbp->rbp_mincredits = rbp->rbp_credits; - /* - * We need to schedule blocked msg using the newly - * added buffers. - */ - while (!list_empty(&rbp->rbp_bufs) && - !list_empty(&rbp->rbp_msgs)) - lnet_schedule_blocked_locked(rbp); - - lnet_net_unlock(cpt); - - return 0; - -failed: - while (!list_empty(&rb_list)) { - rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list); - list_del(&rb->rb_list); - lnet_destroy_rtrbuf(rb, npages); - } - - return -ENOMEM; -} - -static void -lnet_rtrpool_init(struct lnet_rtrbufpool *rbp, int npages) -{ - INIT_LIST_HEAD(&rbp->rbp_msgs); - INIT_LIST_HEAD(&rbp->rbp_bufs); - - rbp->rbp_npages = npages; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; -} - -void -lnet_rtrpools_free(int keep_pools) -{ - struct lnet_rtrbufpool *rtrp; - int i; - - if (!the_lnet.ln_rtrpools) /* uninitialized or freed */ - return; - - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i); - lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i); - lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i); - } - - if (!keep_pools) { - cfs_percpt_free(the_lnet.ln_rtrpools); - the_lnet.ln_rtrpools = NULL; - } -} - -static int -lnet_nrb_tiny_calculate(void) -{ - int nrbs = LNET_NRB_TINY; - - if (tiny_router_buffers < 0) { - LCONSOLE_ERROR_MSG(0x10c, - "tiny_router_buffers=%d invalid when routing enabled\n", - tiny_router_buffers); - return -EINVAL; - } - - if (tiny_router_buffers > 0) - nrbs = tiny_router_buffers; - - nrbs /= LNET_CPT_NUMBER; - return max(nrbs, LNET_NRB_TINY_MIN); -} - -static int -lnet_nrb_small_calculate(void) -{ - int nrbs = LNET_NRB_SMALL; - - if (small_router_buffers < 0) { - LCONSOLE_ERROR_MSG(0x10c, - "small_router_buffers=%d invalid when routing enabled\n", - small_router_buffers); - return -EINVAL; - } - - if (small_router_buffers > 0) - nrbs = small_router_buffers; - - nrbs /= LNET_CPT_NUMBER; - return max(nrbs, LNET_NRB_SMALL_MIN); -} - -static int -lnet_nrb_large_calculate(void) -{ - int nrbs = LNET_NRB_LARGE; - - if (large_router_buffers < 0) { - LCONSOLE_ERROR_MSG(0x10c, - "large_router_buffers=%d invalid when routing enabled\n", - large_router_buffers); - return -EINVAL; - } - - if (large_router_buffers > 0) - nrbs = large_router_buffers; - - nrbs /= LNET_CPT_NUMBER; - return max(nrbs, LNET_NRB_LARGE_MIN); -} - -int -lnet_rtrpools_alloc(int im_a_router) -{ - struct lnet_rtrbufpool *rtrp; - int nrb_tiny; - int nrb_small; - int nrb_large; - int rc; - int i; - - if (!strcmp(forwarding, "")) { - /* not set either way */ - if (!im_a_router) - return 0; - } else if (!strcmp(forwarding, "disabled")) { - /* explicitly disabled */ - return 0; - } else if (!strcmp(forwarding, "enabled")) { - /* explicitly enabled */ - } else { - LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n"); - return -EINVAL; - } - - nrb_tiny = lnet_nrb_tiny_calculate(); - if (nrb_tiny < 0) - return -EINVAL; - - nrb_small = lnet_nrb_small_calculate(); - if (nrb_small < 0) - return -EINVAL; - - nrb_large = lnet_nrb_large_calculate(); - if (nrb_large < 0) - return -EINVAL; - - the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(), - LNET_NRBPOOLS * - sizeof(struct lnet_rtrbufpool)); - if (!the_lnet.ln_rtrpools) { - LCONSOLE_ERROR_MSG(0x10c, - "Failed to initialize router buffe pool\n"); - return -ENOMEM; - } - - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0); - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX], - nrb_tiny, i); - if (rc) - goto failed; - - lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX], - LNET_NRB_SMALL_PAGES); - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX], - nrb_small, i); - if (rc) - goto failed; - - lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX], - LNET_NRB_LARGE_PAGES); - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX], - nrb_large, i); - if (rc) - goto failed; - } - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_routing = 1; - lnet_net_unlock(LNET_LOCK_EX); - - return 0; - - failed: - lnet_rtrpools_free(0); - return rc; -} - -static int -lnet_rtrpools_adjust_helper(int tiny, int small, int large) -{ - int nrb = 0; - int rc = 0; - int i; - struct lnet_rtrbufpool *rtrp; - - /* - * If the provided values for each buffer pool are different than the - * configured values, we need to take action. - */ - if (tiny >= 0) { - tiny_router_buffers = tiny; - nrb = lnet_nrb_tiny_calculate(); - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX], - nrb, i); - if (rc) - return rc; - } - } - if (small >= 0) { - small_router_buffers = small; - nrb = lnet_nrb_small_calculate(); - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX], - nrb, i); - if (rc) - return rc; - } - } - if (large >= 0) { - large_router_buffers = large; - nrb = lnet_nrb_large_calculate(); - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX], - nrb, i); - if (rc) - return rc; - } - } - - return 0; -} - -int -lnet_rtrpools_adjust(int tiny, int small, int large) -{ - /* - * this function doesn't revert the changes if adding new buffers - * failed. It's up to the user space caller to revert the - * changes. - */ - if (!the_lnet.ln_routing) - return 0; - - return lnet_rtrpools_adjust_helper(tiny, small, large); -} - -int -lnet_rtrpools_enable(void) -{ - int rc = 0; - - if (the_lnet.ln_routing) - return 0; - - if (!the_lnet.ln_rtrpools) - /* - * If routing is turned off, and we have never - * initialized the pools before, just call the - * standard buffer pool allocation routine as - * if we are just configuring this for the first - * time. - */ - rc = lnet_rtrpools_alloc(1); - else - rc = lnet_rtrpools_adjust_helper(0, 0, 0); - if (rc) - return rc; - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_routing = 1; - - the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED; - lnet_net_unlock(LNET_LOCK_EX); - - return rc; -} - -void -lnet_rtrpools_disable(void) -{ - if (!the_lnet.ln_routing) - return; - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_routing = 0; - the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED; - - tiny_router_buffers = 0; - small_router_buffers = 0; - large_router_buffers = 0; - lnet_net_unlock(LNET_LOCK_EX); - lnet_rtrpools_free(1); -} - -int -lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, unsigned long when) -{ - struct lnet_peer *lp = NULL; - unsigned long now = cfs_time_current(); - int cpt = lnet_cpt_of_nid(nid); - - LASSERT(!in_interrupt()); - - CDEBUG(D_NET, "%s notifying %s: %s\n", - !ni ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), - alive ? "up" : "down"); - - if (ni && - LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) { - CWARN("Ignoring notification of %s %s by %s (different net)\n", - libcfs_nid2str(nid), alive ? "birth" : "death", - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - } - - /* can't do predictions... */ - if (cfs_time_after(when, now)) { - CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n", - !ni ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), alive ? "up" : "down", - cfs_duration_sec(cfs_time_sub(when, now))); - return -EINVAL; - } - - if (ni && !alive && /* LND telling me she's down */ - !auto_down) { /* auto-down disabled */ - CDEBUG(D_NET, "Auto-down disabled\n"); - return 0; - } - - lnet_net_lock(cpt); - - if (the_lnet.ln_shutdown) { - lnet_net_unlock(cpt); - return -ESHUTDOWN; - } - - lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid); - if (!lp) { - /* nid not found */ - lnet_net_unlock(cpt); - CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid)); - return 0; - } - - /* - * We can't fully trust LND on reporting exact peer last_alive - * if he notifies us about dead peer. For example ksocklnd can - * call us with when == _time_when_the_node_was_booted_ if - * no connections were successfully established - */ - if (ni && !alive && when < lp->lp_last_alive) - when = lp->lp_last_alive; - - lnet_notify_locked(lp, !ni, alive, when); - - if (ni) - lnet_ni_notify_locked(ni, lp); - - lnet_peer_decref_locked(lp); - - lnet_net_unlock(cpt); - return 0; -} -EXPORT_SYMBOL(lnet_notify); diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c deleted file mode 100644 index 1a71ffebc889..000000000000 --- a/drivers/staging/lustre/lnet/lnet/router_proc.c +++ /dev/null @@ -1,909 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> - -/* - * This is really lnet_proc.c. You might need to update sanity test 215 - * if any file format is changed. - */ - -#define LNET_LOFFT_BITS (sizeof(loff_t) * 8) -/* - * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system - */ -#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1) -/* change version, 16 bits or 8 bits */ -#define LNET_PROC_VER_BITS max_t(size_t, min_t(size_t, LNET_LOFFT_BITS, 64) / 4, 8) - -#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS -/* - * bits for peer hash offset - * NB: we don't use the highest bit of *ppos because it's signed - */ -#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \ - LNET_PROC_CPT_BITS - \ - LNET_PROC_VER_BITS - \ - LNET_PROC_HASH_BITS - 1) -/* bits for hash index + position */ -#define LNET_PROC_HPOS_BITS (LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS) -/* bits for peer hash table + hash version */ -#define LNET_PROC_VPOS_BITS (LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS) - -#define LNET_PROC_CPT_MASK ((1ULL << LNET_PROC_CPT_BITS) - 1) -#define LNET_PROC_VER_MASK ((1ULL << LNET_PROC_VER_BITS) - 1) -#define LNET_PROC_HASH_MASK ((1ULL << LNET_PROC_HASH_BITS) - 1) -#define LNET_PROC_HOFF_MASK ((1ULL << LNET_PROC_HOFF_BITS) - 1) - -#define LNET_PROC_CPT_GET(pos) \ - (int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK) - -#define LNET_PROC_VER_GET(pos) \ - (int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK) - -#define LNET_PROC_HASH_GET(pos) \ - (int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK) - -#define LNET_PROC_HOFF_GET(pos) \ - (int)((pos) & LNET_PROC_HOFF_MASK) - -#define LNET_PROC_POS_MAKE(cpt, ver, hash, off) \ - (((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) | \ - ((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) | \ - ((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \ - ((off) & LNET_PROC_HOFF_MASK)) - -#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK)) - -static int __proc_lnet_stats(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - int rc; - struct lnet_counters *ctrs; - int len; - char *tmpstr; - const int tmpsiz = 256; /* 7 %u and 4 %llu */ - - if (write) { - lnet_counters_reset(); - return 0; - } - - /* read */ - - ctrs = kzalloc(sizeof(*ctrs), GFP_NOFS); - if (!ctrs) - return -ENOMEM; - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) { - kfree(ctrs); - return -ENOMEM; - } - - lnet_counters_get(ctrs); - - len = snprintf(tmpstr, tmpsiz, - "%u %u %u %u %u %u %u %llu %llu %llu %llu", - ctrs->msgs_alloc, ctrs->msgs_max, - ctrs->errors, - ctrs->send_count, ctrs->recv_count, - ctrs->route_count, ctrs->drop_count, - ctrs->send_length, ctrs->recv_length, - ctrs->route_length, ctrs->drop_length); - - if (pos >= min_t(int, len, strlen(tmpstr))) - rc = 0; - else - rc = cfs_trace_copyout_string(buffer, nob, - tmpstr + pos, "\n"); - - kfree(tmpstr); - kfree(ctrs); - return rc; -} - -static int proc_lnet_stats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_stats); -} - -static int proc_lnet_routes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - const int tmpsiz = 256; - char *tmpstr; - char *s; - int rc = 0; - int len; - int ver; - int off; - - BUILD_BUG_ON(sizeof(loff_t) < 4); - - off = LNET_PROC_HOFF_GET(*ppos); - ver = LNET_PROC_VER_GET(*ppos); - - LASSERT(!write); - - if (!*lenp) - return 0; - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n", - the_lnet.ln_routing ? "enabled" : "disabled"); - LASSERT(tmpstr + tmpsiz - s > 0); - - s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %8s %7s %s\n", - "net", "hops", "priority", "state", "router"); - LASSERT(tmpstr + tmpsiz - s > 0); - - lnet_net_lock(0); - ver = (unsigned int)the_lnet.ln_remote_nets_version; - lnet_net_unlock(0); - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } else { - struct list_head *n; - struct list_head *r; - struct lnet_route *route = NULL; - struct lnet_remotenet *rnet = NULL; - int skip = off - 1; - struct list_head *rn_list; - int i; - - lnet_net_lock(0); - - if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) { - lnet_net_unlock(0); - kfree(tmpstr); - return -ESTALE; - } - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) { - rn_list = &the_lnet.ln_remote_nets_hash[i]; - - n = rn_list->next; - - while (n != rn_list && !route) { - rnet = list_entry(n, struct lnet_remotenet, - lrn_list); - - r = rnet->lrn_routes.next; - - while (r != &rnet->lrn_routes) { - struct lnet_route *re; - - re = list_entry(r, struct lnet_route, - lr_list); - if (!skip) { - route = re; - break; - } - - skip--; - r = r->next; - } - - n = n->next; - } - } - - if (route) { - __u32 net = rnet->lrn_net; - __u32 hops = route->lr_hops; - unsigned int priority = route->lr_priority; - lnet_nid_t nid = route->lr_gateway->lp_nid; - int alive = lnet_is_route_alive(route); - - s += snprintf(s, tmpstr + tmpsiz - s, - "%-8s %4u %8u %7s %s\n", - libcfs_net2str(net), hops, - priority, - alive ? "up" : "down", - libcfs_nid2str(nid)); - LASSERT(tmpstr + tmpsiz - s > 0); - } - - lnet_net_unlock(0); - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) { - rc = -EFAULT; - } else { - off += 1; - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } - } - - kfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -static int proc_lnet_routers(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int rc = 0; - char *tmpstr; - char *s; - const int tmpsiz = 256; - int len; - int ver; - int off; - - off = LNET_PROC_HOFF_GET(*ppos); - ver = LNET_PROC_VER_GET(*ppos); - - LASSERT(!write); - - if (!*lenp) - return 0; - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n", - "ref", "rtr_ref", "alive_cnt", "state", - "last_ping", "ping_sent", "deadline", - "down_ni", "router"); - LASSERT(tmpstr + tmpsiz - s > 0); - - lnet_net_lock(0); - ver = (unsigned int)the_lnet.ln_routers_version; - lnet_net_unlock(0); - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } else { - struct list_head *r; - struct lnet_peer *peer = NULL; - int skip = off - 1; - - lnet_net_lock(0); - - if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) { - lnet_net_unlock(0); - - kfree(tmpstr); - return -ESTALE; - } - - r = the_lnet.ln_routers.next; - - while (r != &the_lnet.ln_routers) { - struct lnet_peer *lp; - - lp = list_entry(r, struct lnet_peer, lp_rtr_list); - if (!skip) { - peer = lp; - break; - } - - skip--; - r = r->next; - } - - if (peer) { - lnet_nid_t nid = peer->lp_nid; - unsigned long now = cfs_time_current(); - unsigned long deadline = peer->lp_ping_deadline; - int nrefs = peer->lp_refcount; - int nrtrrefs = peer->lp_rtr_refcount; - int alive_cnt = peer->lp_alive_count; - int alive = peer->lp_alive; - int pingsent = !peer->lp_ping_notsent; - int last_ping = cfs_duration_sec(cfs_time_sub(now, - peer->lp_ping_timestamp)); - int down_ni = 0; - struct lnet_route *rtr; - - if ((peer->lp_ping_feats & - LNET_PING_FEAT_NI_STATUS)) { - list_for_each_entry(rtr, &peer->lp_routes, - lr_gwlist) { - /* - * downis on any route should be the - * number of downis on the gateway - */ - if (rtr->lr_downis) { - down_ni = rtr->lr_downis; - break; - } - } - } - - if (!deadline) - s += snprintf(s, tmpstr + tmpsiz - s, - "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n", - nrefs, nrtrrefs, alive_cnt, - alive ? "up" : "down", last_ping, - pingsent, "NA", down_ni, - libcfs_nid2str(nid)); - else - s += snprintf(s, tmpstr + tmpsiz - s, - "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n", - nrefs, nrtrrefs, alive_cnt, - alive ? "up" : "down", last_ping, - pingsent, - cfs_duration_sec(cfs_time_sub(deadline, now)), - down_ni, libcfs_nid2str(nid)); - LASSERT(tmpstr + tmpsiz - s > 0); - } - - lnet_net_unlock(0); - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) { - rc = -EFAULT; - } else { - off += 1; - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } - } - - kfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -static int proc_lnet_peers(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - const int tmpsiz = 256; - struct lnet_peer_table *ptable; - char *tmpstr; - char *s; - int cpt = LNET_PROC_CPT_GET(*ppos); - int ver = LNET_PROC_VER_GET(*ppos); - int hash = LNET_PROC_HASH_GET(*ppos); - int hoff = LNET_PROC_HOFF_GET(*ppos); - int rc = 0; - int len; - - BUILD_BUG_ON(LNET_PROC_HASH_BITS < LNET_PEER_HASH_BITS); - LASSERT(!write); - - if (!*lenp) - return 0; - - if (cpt >= LNET_CPT_NUMBER) { - *lenp = 0; - return 0; - } - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n", - "nid", "refs", "state", "last", "max", - "rtr", "min", "tx", "min", "queue"); - LASSERT(tmpstr + tmpsiz - s > 0); - - hoff++; - } else { - struct lnet_peer *peer; - struct list_head *p; - int skip; - again: - p = NULL; - peer = NULL; - skip = hoff - 1; - - lnet_net_lock(cpt); - ptable = the_lnet.ln_peer_tables[cpt]; - if (hoff == 1) - ver = LNET_PROC_VERSION(ptable->pt_version); - - if (ver != LNET_PROC_VERSION(ptable->pt_version)) { - lnet_net_unlock(cpt); - kfree(tmpstr); - return -ESTALE; - } - - while (hash < LNET_PEER_HASH_SIZE) { - if (!p) - p = ptable->pt_hash[hash].next; - - while (p != &ptable->pt_hash[hash]) { - struct lnet_peer *lp; - - lp = list_entry(p, struct lnet_peer, - lp_hashlist); - if (!skip) { - peer = lp; - - /* - * minor optimization: start from idx+1 - * on next iteration if we've just - * drained lp_hashlist - */ - if (lp->lp_hashlist.next == - &ptable->pt_hash[hash]) { - hoff = 1; - hash++; - } else { - hoff++; - } - - break; - } - - skip--; - p = lp->lp_hashlist.next; - } - - if (peer) - break; - - p = NULL; - hoff = 1; - hash++; - } - - if (peer) { - lnet_nid_t nid = peer->lp_nid; - int nrefs = peer->lp_refcount; - int lastalive = -1; - char *aliveness = "NA"; - int maxcr = peer->lp_ni->ni_peertxcredits; - int txcr = peer->lp_txcredits; - int mintxcr = peer->lp_mintxcredits; - int rtrcr = peer->lp_rtrcredits; - int minrtrcr = peer->lp_minrtrcredits; - int txqnob = peer->lp_txqnob; - - if (lnet_isrouter(peer) || - lnet_peer_aliveness_enabled(peer)) - aliveness = peer->lp_alive ? "up" : "down"; - - if (lnet_peer_aliveness_enabled(peer)) { - unsigned long now = cfs_time_current(); - long delta; - - delta = cfs_time_sub(now, peer->lp_last_alive); - lastalive = cfs_duration_sec(delta); - - /* No need to mess up peers contents with - * arbitrarily long integers - it suffices to - * know that lastalive is more than 10000s old - */ - if (lastalive >= 10000) - lastalive = 9999; - } - - lnet_net_unlock(cpt); - - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n", - libcfs_nid2str(nid), nrefs, aliveness, - lastalive, maxcr, rtrcr, minrtrcr, txcr, - mintxcr, txqnob); - LASSERT(tmpstr + tmpsiz - s > 0); - - } else { /* peer is NULL */ - lnet_net_unlock(cpt); - } - - if (hash == LNET_PEER_HASH_SIZE) { - cpt++; - hash = 0; - hoff = 1; - if (!peer && cpt < LNET_CPT_NUMBER) - goto again; - } - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) - rc = -EFAULT; - else - *ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff); - } - - kfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -static int __proc_lnet_buffers(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - char *s; - char *tmpstr; - int tmpsiz; - int idx; - int len; - int rc; - int i; - - LASSERT(!write); - - /* (4 %d) * 4 * LNET_CPT_NUMBER */ - tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER; - tmpstr = kvmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - s += snprintf(s, tmpstr + tmpsiz - s, - "%5s %5s %7s %7s\n", - "pages", "count", "credits", "min"); - LASSERT(tmpstr + tmpsiz - s > 0); - - if (!the_lnet.ln_rtrpools) - goto out; /* I'm not a router */ - - for (idx = 0; idx < LNET_NRBPOOLS; idx++) { - struct lnet_rtrbufpool *rbp; - - lnet_net_lock(LNET_LOCK_EX); - cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%5d %5d %7d %7d\n", - rbp[idx].rbp_npages, - rbp[idx].rbp_nbuffers, - rbp[idx].rbp_credits, - rbp[idx].rbp_mincredits); - LASSERT(tmpstr + tmpsiz - s > 0); - } - lnet_net_unlock(LNET_LOCK_EX); - } - - out: - len = s - tmpstr; - - if (pos >= min_t(int, len, strlen(tmpstr))) - rc = 0; - else - rc = cfs_trace_copyout_string(buffer, nob, - tmpstr + pos, NULL); - - kvfree(tmpstr); - return rc; -} - -static int proc_lnet_buffers(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_buffers); -} - -static int proc_lnet_nis(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int tmpsiz = 128 * LNET_CPT_NUMBER; - int rc = 0; - char *tmpstr; - char *s; - int len; - - LASSERT(!write); - - if (!*lenp) - return 0; - - tmpstr = kvmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n", - "nid", "status", "alive", "refs", "peer", - "rtr", "max", "tx", "min"); - LASSERT(tmpstr + tmpsiz - s > 0); - } else { - struct list_head *n; - struct lnet_ni *ni = NULL; - int skip = *ppos - 1; - - lnet_net_lock(0); - - n = the_lnet.ln_nis.next; - - while (n != &the_lnet.ln_nis) { - struct lnet_ni *a_ni; - - a_ni = list_entry(n, struct lnet_ni, ni_list); - if (!skip) { - ni = a_ni; - break; - } - - skip--; - n = n->next; - } - - if (ni) { - struct lnet_tx_queue *tq; - char *stat; - time64_t now = ktime_get_real_seconds(); - int last_alive = -1; - int i; - int j; - - if (the_lnet.ln_routing) - last_alive = now - ni->ni_last_alive; - - /* @lo forever alive */ - if (ni->ni_lnd->lnd_type == LOLND) - last_alive = 0; - - lnet_ni_lock(ni); - LASSERT(ni->ni_status); - stat = (ni->ni_status->ns_status == - LNET_NI_STATUS_UP) ? "up" : "down"; - lnet_ni_unlock(ni); - - /* - * we actually output credits information for - * TX queue of each partition - */ - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { - for (j = 0; ni->ni_cpts && - j < ni->ni_ncpts; j++) { - if (i == ni->ni_cpts[j]) - break; - } - - if (j == ni->ni_ncpts) - continue; - - if (i) - lnet_net_lock(i); - - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n", - libcfs_nid2str(ni->ni_nid), stat, - last_alive, *ni->ni_refs[i], - ni->ni_peertxcredits, - ni->ni_peerrtrcredits, - tq->tq_credits_max, - tq->tq_credits, - tq->tq_credits_min); - if (i) - lnet_net_unlock(i); - } - LASSERT(tmpstr + tmpsiz - s > 0); - } - - lnet_net_unlock(0); - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) - rc = -EFAULT; - else - *ppos += 1; - } - - kvfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -struct lnet_portal_rotors { - int pr_value; - const char *pr_name; - const char *pr_desc; -}; - -static struct lnet_portal_rotors portal_rotors[] = { - { - .pr_value = LNET_PTL_ROTOR_OFF, - .pr_name = "OFF", - .pr_desc = "Turn off message rotor for wildcard portals" - }, - { - .pr_value = LNET_PTL_ROTOR_ON, - .pr_name = "ON", - .pr_desc = "round-robin dispatch all PUT messages for wildcard portals" - }, - { - .pr_value = LNET_PTL_ROTOR_RR_RT, - .pr_name = "RR_RT", - .pr_desc = "round-robin dispatch routed PUT message for wildcard portals" - }, - { - .pr_value = LNET_PTL_ROTOR_HASH_RT, - .pr_name = "HASH_RT", - .pr_desc = "dispatch routed PUT message by hashing source NID for wildcard portals" - }, - { - .pr_value = -1, - .pr_name = NULL, - .pr_desc = NULL - }, -}; - -static int __proc_lnet_portal_rotor(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - const int buf_len = 128; - char *buf; - char *tmp; - int rc; - int i; - - buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (!write) { - lnet_res_lock(0); - - for (i = 0; portal_rotors[i].pr_value >= 0; i++) { - if (portal_rotors[i].pr_value == portal_rotor) - break; - } - - LASSERT(portal_rotors[i].pr_value == portal_rotor); - lnet_res_unlock(0); - - rc = snprintf(buf, buf_len, - "{\n\tportals: all\n" - "\trotor: %s\n\tdescription: %s\n}", - portal_rotors[i].pr_name, - portal_rotors[i].pr_desc); - - if (pos >= min_t(int, rc, buf_len)) { - rc = 0; - } else { - rc = cfs_trace_copyout_string(buffer, nob, - buf + pos, "\n"); - } - goto out; - } - - rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob); - if (rc < 0) - goto out; - - tmp = strim(buf); - - rc = -EINVAL; - lnet_res_lock(0); - for (i = 0; portal_rotors[i].pr_name; i++) { - if (!strncasecmp(portal_rotors[i].pr_name, tmp, - strlen(portal_rotors[i].pr_name))) { - portal_rotor = portal_rotors[i].pr_value; - rc = 0; - break; - } - } - lnet_res_unlock(0); -out: - kfree(buf); - return rc; -} - -static int proc_lnet_portal_rotor(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_portal_rotor); -} - -static struct ctl_table lnet_table[] = { - /* - * NB No .strategy entries have been provided since sysctl(8) prefers - * to go via /proc for portability. - */ - { - .procname = "stats", - .mode = 0644, - .proc_handler = &proc_lnet_stats, - }, - { - .procname = "routes", - .mode = 0444, - .proc_handler = &proc_lnet_routes, - }, - { - .procname = "routers", - .mode = 0444, - .proc_handler = &proc_lnet_routers, - }, - { - .procname = "peers", - .mode = 0444, - .proc_handler = &proc_lnet_peers, - }, - { - .procname = "buffers", - .mode = 0444, - .proc_handler = &proc_lnet_buffers, - }, - { - .procname = "nis", - .mode = 0444, - .proc_handler = &proc_lnet_nis, - }, - { - .procname = "portal_rotor", - .mode = 0644, - .proc_handler = &proc_lnet_portal_rotor, - }, - { - } -}; - -void lnet_router_debugfs_init(void) -{ - lustre_insert_debugfs(lnet_table, NULL); -} - -void lnet_router_debugfs_fini(void) -{ -} diff --git a/drivers/staging/lustre/lnet/selftest/Makefile b/drivers/staging/lustre/lnet/selftest/Makefile deleted file mode 100644 index 3ccc8966b566..000000000000 --- a/drivers/staging/lustre/lnet/selftest/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o - -lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \ - module.o ping_test.o brw_test.o diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c deleted file mode 100644 index f1ee219bc8f3..000000000000 --- a/drivers/staging/lustre/lnet/selftest/brw_test.c +++ /dev/null @@ -1,526 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/brw_test.c - * - * Author: Isaac Huang <isaac@clusterfs.com> - */ - -#include "selftest.h" - -static int brw_srv_workitems = SFW_TEST_WI_MAX; -module_param(brw_srv_workitems, int, 0644); -MODULE_PARM_DESC(brw_srv_workitems, "# BRW server workitems"); - -static int brw_inject_errors; -module_param(brw_inject_errors, int, 0644); -MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default"); - -#define BRW_POISON 0xbeefbeefbeefbeefULL -#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL -#define BRW_MSIZE sizeof(u64) - -static void -brw_client_fini(struct sfw_test_instance *tsi) -{ - struct srpc_bulk *bulk; - struct sfw_test_unit *tsu; - - LASSERT(tsi->tsi_is_client); - - list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { - bulk = tsu->tsu_private; - if (!bulk) - continue; - - srpc_free_bulk(bulk); - tsu->tsu_private = NULL; - } -} - -static int -brw_client_init(struct sfw_test_instance *tsi) -{ - struct sfw_session *sn = tsi->tsi_batch->bat_session; - int flags; - int off; - int npg; - int len; - int opc; - struct srpc_bulk *bulk; - struct sfw_test_unit *tsu; - - LASSERT(sn); - LASSERT(tsi->tsi_is_client); - - if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0; - - opc = breq->blk_opc; - flags = breq->blk_flags; - npg = breq->blk_npg; - /* - * NB: this is not going to work for variable page size, - * but we have to keep it for compatibility - */ - len = npg * PAGE_SIZE; - off = 0; - } else { - struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; - - /* - * I should never get this step if it's unknown feature - * because make_session will reject unknown feature - */ - LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - - opc = breq->blk_opc; - flags = breq->blk_flags; - len = breq->blk_len; - off = breq->blk_offset & ~PAGE_MASK; - npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - - if (off % BRW_MSIZE) - return -EINVAL; - - if (npg > LNET_MAX_IOV || npg <= 0) - return -EINVAL; - - if (opc != LST_BRW_READ && opc != LST_BRW_WRITE) - return -EINVAL; - - if (flags != LST_BRW_CHECK_NONE && - flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE) - return -EINVAL; - - list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { - bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid), - off, npg, len, opc == LST_BRW_READ); - if (!bulk) { - brw_client_fini(tsi); - return -ENOMEM; - } - - tsu->tsu_private = bulk; - } - - return 0; -} - -static int brw_inject_one_error(void) -{ - struct timespec64 ts; - - if (brw_inject_errors <= 0) - return 0; - - ktime_get_ts64(&ts); - - if (!((ts.tv_nsec / NSEC_PER_USEC) & 1)) - return 0; - - return brw_inject_errors--; -} - -static void -brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic) -{ - char *addr = page_address(pg) + off; - int i; - - LASSERT(addr); - LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE)); - - if (pattern == LST_BRW_CHECK_NONE) - return; - - if (magic == BRW_MAGIC) - magic += brw_inject_one_error(); - - if (pattern == LST_BRW_CHECK_SIMPLE) { - memcpy(addr, &magic, BRW_MSIZE); - if (len > BRW_MSIZE) { - addr += PAGE_SIZE - BRW_MSIZE; - memcpy(addr, &magic, BRW_MSIZE); - } - return; - } - - if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < len; i += BRW_MSIZE) - memcpy(addr + i, &magic, BRW_MSIZE); - return; - } - - LBUG(); -} - -static int -brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic) -{ - char *addr = page_address(pg) + off; - __u64 data = 0; /* make compiler happy */ - int i; - - LASSERT(addr); - LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE)); - - if (pattern == LST_BRW_CHECK_NONE) - return 0; - - if (pattern == LST_BRW_CHECK_SIMPLE) { - data = *((__u64 *)addr); - if (data != magic) - goto bad_data; - - if (len > BRW_MSIZE) { - addr += PAGE_SIZE - BRW_MSIZE; - data = *((__u64 *)addr); - if (data != magic) - goto bad_data; - } - return 0; - } - - if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < len; i += BRW_MSIZE) { - data = *(u64 *)(addr + i); - if (data != magic) - goto bad_data; - } - return 0; - } - - LBUG(); - -bad_data: - CERROR("Bad data in page %p: %#llx, %#llx expected\n", - pg, data, magic); - return 1; -} - -static void -brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) -{ - int i; - struct page *pg; - - for (i = 0; i < bk->bk_niov; i++) { - int off, len; - - pg = bk->bk_iovs[i].bv_page; - off = bk->bk_iovs[i].bv_offset; - len = bk->bk_iovs[i].bv_len; - brw_fill_page(pg, off, len, pattern, magic); - } -} - -static int -brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) -{ - int i; - struct page *pg; - - for (i = 0; i < bk->bk_niov; i++) { - int off, len; - - pg = bk->bk_iovs[i].bv_page; - off = bk->bk_iovs[i].bv_offset; - len = bk->bk_iovs[i].bv_len; - if (brw_check_page(pg, off, len, pattern, magic)) { - CERROR("Bulk page %p (%d/%d) is corrupted!\n", - pg, i, bk->bk_niov); - return 1; - } - } - - return 0; -} - -static int -brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest, - struct srpc_client_rpc **rpcpp) -{ - struct srpc_bulk *bulk = tsu->tsu_private; - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct srpc_client_rpc *rpc; - struct srpc_brw_reqst *req; - int flags; - int npg; - int len; - int opc; - int rc; - - LASSERT(sn); - LASSERT(bulk); - - if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0; - - opc = breq->blk_opc; - flags = breq->blk_flags; - npg = breq->blk_npg; - len = npg * PAGE_SIZE; - } else { - struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; - int off; - - /* - * I should never get this step if it's unknown feature - * because make_session will reject unknown feature - */ - LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - - opc = breq->blk_opc; - flags = breq->blk_flags; - len = breq->blk_len; - off = breq->blk_offset; - npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - - rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc); - if (rc) - return rc; - - memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg])); - if (opc == LST_BRW_WRITE) - brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC); - else - brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON); - - req = &rpc->crpc_reqstmsg.msg_body.brw_reqst; - req->brw_flags = flags; - req->brw_rw = opc; - req->brw_len = len; - - *rpcpp = rpc; - return 0; -} - -static void -brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) -{ - __u64 magic = BRW_MAGIC; - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct srpc_msg *msg = &rpc->crpc_replymsg; - struct srpc_brw_reply *reply = &msg->msg_body.brw_reply; - struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst; - - LASSERT(sn); - - if (rpc->crpc_status) { - CERROR("BRW RPC to %s failed with %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_status); - if (!tsi->tsi_stopping) /* rpc could have been aborted */ - atomic_inc(&sn->sn_brw_errors); - return; - } - - if (msg->msg_magic != SRPC_MSG_MAGIC) { - __swab64s(&magic); - __swab32s(&reply->brw_status); - } - - CDEBUG(reply->brw_status ? D_WARNING : D_NET, - "BRW RPC to %s finished with brw_status: %d\n", - libcfs_id2str(rpc->crpc_dest), reply->brw_status); - - if (reply->brw_status) { - atomic_inc(&sn->sn_brw_errors); - rpc->crpc_status = -(int)reply->brw_status; - return; - } - - if (reqst->brw_rw == LST_BRW_WRITE) - return; - - if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic)) { - CERROR("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->crpc_dest)); - atomic_inc(&sn->sn_brw_errors); - rpc->crpc_status = -EBADMSG; - } -} - -static void -brw_server_rpc_done(struct srpc_server_rpc *rpc) -{ - struct srpc_bulk *blk = rpc->srpc_bulk; - - if (!blk) - return; - - if (rpc->srpc_status) - CERROR("Bulk transfer %s %s has failed: %d\n", - blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer), rpc->srpc_status); - else - CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n", - blk->bk_niov, blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer)); - - sfw_free_pages(rpc); -} - -static int -brw_bulk_ready(struct srpc_server_rpc *rpc, int status) -{ - __u64 magic = BRW_MAGIC; - struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply; - struct srpc_brw_reqst *reqst; - struct srpc_msg *reqstmsg; - - LASSERT(rpc->srpc_bulk); - LASSERT(rpc->srpc_reqstbuf); - - reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - reqst = &reqstmsg->msg_body.brw_reqst; - - if (status) { - CERROR("BRW bulk %s failed for RPC from %s: %d\n", - reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE", - libcfs_id2str(rpc->srpc_peer), status); - return -EIO; - } - - if (reqst->brw_rw == LST_BRW_READ) - return 0; - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) - __swab64s(&magic); - - if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic)) { - CERROR("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->srpc_peer)); - reply->brw_status = EBADMSG; - } - - return 0; -} - -static int -brw_server_handle(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - struct srpc_msg *replymsg = &rpc->srpc_replymsg; - struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply; - struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst; - int npg; - int rc; - - LASSERT(sv->sv_id == SRPC_SERVICE_BRW); - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) { - LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - __swab32s(&reqst->brw_rw); - __swab32s(&reqst->brw_len); - __swab32s(&reqst->brw_flags); - __swab64s(&reqst->brw_rpyid); - __swab64s(&reqst->brw_bulkid); - } - LASSERT(reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id)); - - reply->brw_status = 0; - rpc->srpc_done = brw_server_rpc_done; - - if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) || - (reqst->brw_flags != LST_BRW_CHECK_NONE && - reqst->brw_flags != LST_BRW_CHECK_FULL && - reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) { - reply->brw_status = EINVAL; - return 0; - } - - if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) { - replymsg->msg_ses_feats = LST_FEATS_MASK; - reply->brw_status = EPROTO; - return 0; - } - - if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) { - /* compat with old version */ - if (reqst->brw_len & ~PAGE_MASK) { - reply->brw_status = EINVAL; - return 0; - } - npg = reqst->brw_len >> PAGE_SHIFT; - - } else { - npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - - replymsg->msg_ses_feats = reqstmsg->msg_ses_feats; - - if (!reqst->brw_len || npg > LNET_MAX_IOV) { - reply->brw_status = EINVAL; - return 0; - } - - rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg, - reqst->brw_len, - reqst->brw_rw == LST_BRW_WRITE); - if (rc) - return rc; - - if (reqst->brw_rw == LST_BRW_READ) - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC); - else - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON); - - return 0; -} - -struct sfw_test_client_ops brw_test_client; - -void brw_init_test_client(void) -{ - brw_test_client.tso_init = brw_client_init; - brw_test_client.tso_fini = brw_client_fini; - brw_test_client.tso_prep_rpc = brw_client_prep_rpc; - brw_test_client.tso_done_rpc = brw_client_done_rpc; -}; - -struct srpc_service brw_test_service; - -void brw_init_test_service(void) -{ - brw_test_service.sv_id = SRPC_SERVICE_BRW; - brw_test_service.sv_name = "brw_test"; - brw_test_service.sv_handler = brw_server_handle; - brw_test_service.sv_bulk_ready = brw_bulk_ready; - brw_test_service.sv_wi_total = brw_srv_workitems; -} diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c deleted file mode 100644 index a2d8092bdeb7..000000000000 --- a/drivers/staging/lustre/lnet/selftest/conctl.c +++ /dev/null @@ -1,799 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * IOC handle in kernel - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - */ - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> -#include <uapi/linux/lnet/lnetst.h> -#include "console.h" - -static int -lst_session_new_ioctl(struct lstio_session_new_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - - if (!args->lstio_ses_idp || /* address for output sid */ - !args->lstio_ses_key || /* no key is specified */ - !args->lstio_ses_namep || /* session name */ - args->lstio_ses_nmlen <= 0 || - args->lstio_ses_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_ses_namep, - args->lstio_ses_nmlen)) { - return -EFAULT; - } - - name[args->lstio_ses_nmlen] = 0; - - rc = lstcon_session_new(name, - args->lstio_ses_key, - args->lstio_ses_feats, - args->lstio_ses_timeout, - args->lstio_ses_force, - args->lstio_ses_idp); - - return rc; -} - -static int -lst_session_end_ioctl(struct lstio_session_end_args *args) -{ - if (args->lstio_ses_key != console_session.ses_key) - return -EACCES; - - return lstcon_session_end(); -} - -static int -lst_session_info_ioctl(struct lstio_session_info_args *args) -{ - /* no checking of key */ - - if (!args->lstio_ses_idp || /* address for output sid */ - !args->lstio_ses_keyp || /* address for output key */ - !args->lstio_ses_featp || /* address for output features */ - !args->lstio_ses_ndinfo || /* address for output ndinfo */ - !args->lstio_ses_namep || /* address for output name */ - args->lstio_ses_nmlen <= 0 || - args->lstio_ses_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_session_info(args->lstio_ses_idp, - args->lstio_ses_keyp, - args->lstio_ses_featp, - args->lstio_ses_ndinfo, - args->lstio_ses_namep, - args->lstio_ses_nmlen); -} - -static int -lst_debug_ioctl(struct lstio_debug_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int client = 1; - int rc; - - if (args->lstio_dbg_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_dbg_resultp) - return -EINVAL; - - if (args->lstio_dbg_namep && /* name of batch/group */ - (args->lstio_dbg_nmlen <= 0 || - args->lstio_dbg_nmlen > LST_NAME_SIZE)) - return -EINVAL; - - if (args->lstio_dbg_namep) { - - if (copy_from_user(name, args->lstio_dbg_namep, - args->lstio_dbg_nmlen)) - return -EFAULT; - - name[args->lstio_dbg_nmlen] = 0; - } - - rc = -EINVAL; - - switch (args->lstio_dbg_type) { - case LST_OPC_SESSION: - rc = lstcon_session_debug(args->lstio_dbg_timeout, - args->lstio_dbg_resultp); - break; - - case LST_OPC_BATCHSRV: - client = 0; - /* fall through */ - case LST_OPC_BATCHCLI: - if (!args->lstio_dbg_namep) - goto out; - - rc = lstcon_batch_debug(args->lstio_dbg_timeout, - name, client, args->lstio_dbg_resultp); - break; - - case LST_OPC_GROUP: - if (!args->lstio_dbg_namep) - goto out; - - rc = lstcon_group_debug(args->lstio_dbg_timeout, - name, args->lstio_dbg_resultp); - break; - - case LST_OPC_NODES: - if (args->lstio_dbg_count <= 0 || - !args->lstio_dbg_idsp) - goto out; - - rc = lstcon_nodes_debug(args->lstio_dbg_timeout, - args->lstio_dbg_count, - args->lstio_dbg_idsp, - args->lstio_dbg_resultp); - break; - - default: - break; - } - -out: - return rc; -} - -static int -lst_group_add_ioctl(struct lstio_group_add_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_add(name); - - return rc; -} - -static int -lst_group_del_ioctl(struct lstio_group_del_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_del(name); - - return rc; -} - -static int -lst_group_update_ioctl(struct lstio_group_update_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_resultp || - !args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - switch (args->lstio_grp_opc) { - case LST_GROUP_CLEAN: - rc = lstcon_group_clean(name, args->lstio_grp_args); - break; - - case LST_GROUP_REFRESH: - rc = lstcon_group_refresh(name, args->lstio_grp_resultp); - break; - - case LST_GROUP_RMND: - if (args->lstio_grp_count <= 0 || - !args->lstio_grp_idsp) { - rc = -EINVAL; - break; - } - rc = lstcon_nodes_remove(name, args->lstio_grp_count, - args->lstio_grp_idsp, - args->lstio_grp_resultp); - break; - - default: - rc = -EINVAL; - break; - } - - return rc; -} - -static int -lst_nodes_add_ioctl(struct lstio_group_nodes_args *args) -{ - unsigned int feats; - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_idsp || /* array of ids */ - args->lstio_grp_count <= 0 || - !args->lstio_grp_resultp || - !args->lstio_grp_featp || - !args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_nodes_add(name, args->lstio_grp_count, - args->lstio_grp_idsp, &feats, - args->lstio_grp_resultp); - - if (!rc && - copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) { - return -EINVAL; - } - - return rc; -} - -static int -lst_group_list_ioctl(struct lstio_group_list_args *args) -{ - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_idx < 0 || - !args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_group_list(args->lstio_grp_idx, - args->lstio_grp_nmlen, - args->lstio_grp_namep); -} - -static int -lst_group_info_ioctl(struct lstio_group_info_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int ndent; - int index; - int rc; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (!args->lstio_grp_entp && /* output: group entry */ - !args->lstio_grp_dentsp) /* output: node entry */ - return -EINVAL; - - if (args->lstio_grp_dentsp) { /* have node entry */ - if (!args->lstio_grp_idxp || /* node index */ - !args->lstio_grp_ndentp) /* # of node entry */ - return -EINVAL; - - if (copy_from_user(&ndent, args->lstio_grp_ndentp, - sizeof(ndent)) || - copy_from_user(&index, args->lstio_grp_idxp, - sizeof(index))) - return -EFAULT; - - if (ndent <= 0 || index < 0) - return -EINVAL; - } - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_info(name, args->lstio_grp_entp, - &index, &ndent, args->lstio_grp_dentsp); - - if (rc) - return rc; - - if (args->lstio_grp_dentsp && - (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) || - copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent)))) - return -EFAULT; - - return 0; -} - -static int -lst_batch_add_ioctl(struct lstio_batch_add_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_add(name); - - return rc; -} - -static int -lst_batch_run_ioctl(struct lstio_batch_run_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_run(name, args->lstio_bat_timeout, - args->lstio_bat_resultp); - - return rc; -} - -static int -lst_batch_stop_ioctl(struct lstio_batch_stop_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_resultp || - !args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_stop(name, args->lstio_bat_force, - args->lstio_bat_resultp); - - return rc; -} - -static int -lst_batch_query_ioctl(struct lstio_batch_query_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_resultp || - !args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (args->lstio_bat_testidx < 0) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_test_batch_query(name, - args->lstio_bat_testidx, - args->lstio_bat_client, - args->lstio_bat_timeout, - args->lstio_bat_resultp); - - return rc; -} - -static int -lst_batch_list_ioctl(struct lstio_batch_list_args *args) -{ - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_idx < 0 || - !args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_batch_list(args->lstio_bat_idx, - args->lstio_bat_nmlen, - args->lstio_bat_namep); -} - -static int -lst_batch_info_ioctl(struct lstio_batch_info_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - int index; - int ndent; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_namep || /* batch name */ - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (!args->lstio_bat_entp && /* output: batch entry */ - !args->lstio_bat_dentsp) /* output: node entry */ - return -EINVAL; - - if (args->lstio_bat_dentsp) { /* have node entry */ - if (!args->lstio_bat_idxp || /* node index */ - !args->lstio_bat_ndentp) /* # of node entry */ - return -EINVAL; - - if (copy_from_user(&index, args->lstio_bat_idxp, - sizeof(index)) || - copy_from_user(&ndent, args->lstio_bat_ndentp, - sizeof(ndent))) - return -EFAULT; - - if (ndent <= 0 || index < 0) - return -EINVAL; - } - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_info(name, args->lstio_bat_entp, - args->lstio_bat_server, args->lstio_bat_testidx, - &index, &ndent, args->lstio_bat_dentsp); - - if (rc) - return rc; - - if (args->lstio_bat_dentsp && - (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) || - copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent)))) - rc = -EFAULT; - - return rc; -} - -static int -lst_stat_query_ioctl(struct lstio_stat_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - /* TODO: not finished */ - if (args->lstio_sta_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_sta_resultp) - return -EINVAL; - - if (args->lstio_sta_idsp) { - if (args->lstio_sta_count <= 0) - return -EINVAL; - - rc = lstcon_nodes_stat(args->lstio_sta_count, - args->lstio_sta_idsp, - args->lstio_sta_timeout, - args->lstio_sta_resultp); - } else if (args->lstio_sta_namep) { - if (args->lstio_sta_nmlen <= 0 || - args->lstio_sta_nmlen > LST_NAME_SIZE) - return -EINVAL; - - rc = copy_from_user(name, args->lstio_sta_namep, - args->lstio_sta_nmlen); - if (!rc) - rc = lstcon_group_stat(name, args->lstio_sta_timeout, - args->lstio_sta_resultp); - else - rc = -EFAULT; - } else { - rc = -EINVAL; - } - - return rc; -} - -static int lst_test_add_ioctl(struct lstio_test_args *args) -{ - char batch_name[LST_NAME_SIZE + 1]; - char src_name[LST_NAME_SIZE + 1]; - char dst_name[LST_NAME_SIZE + 1]; - void *param = NULL; - int ret = 0; - int rc = -ENOMEM; - - if (!args->lstio_tes_resultp || - !args->lstio_tes_retp || - !args->lstio_tes_bat_name || /* no specified batch */ - args->lstio_tes_bat_nmlen <= 0 || - args->lstio_tes_bat_nmlen > LST_NAME_SIZE || - !args->lstio_tes_sgrp_name || /* no source group */ - args->lstio_tes_sgrp_nmlen <= 0 || - args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE || - !args->lstio_tes_dgrp_name || /* no target group */ - args->lstio_tes_dgrp_nmlen <= 0 || - args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (!args->lstio_tes_loop || /* negative is infinite */ - args->lstio_tes_concur <= 0 || - args->lstio_tes_dist <= 0 || - args->lstio_tes_span <= 0) - return -EINVAL; - - /* have parameter, check if parameter length is valid */ - if (args->lstio_tes_param && - (args->lstio_tes_param_len <= 0 || - args->lstio_tes_param_len > - PAGE_SIZE - sizeof(struct lstcon_test))) - return -EINVAL; - - /* Enforce zero parameter length if there's no parameter */ - if (!args->lstio_tes_param && args->lstio_tes_param_len) - return -EINVAL; - - if (args->lstio_tes_param) { - param = memdup_user(args->lstio_tes_param, - args->lstio_tes_param_len); - if (IS_ERR(param)) - return PTR_ERR(param); - } - - rc = -EFAULT; - if (copy_from_user(batch_name, args->lstio_tes_bat_name, - args->lstio_tes_bat_nmlen) || - copy_from_user(src_name, args->lstio_tes_sgrp_name, - args->lstio_tes_sgrp_nmlen) || - copy_from_user(dst_name, args->lstio_tes_dgrp_name, - args->lstio_tes_dgrp_nmlen)) - goto out; - - rc = lstcon_test_add(batch_name, args->lstio_tes_type, - args->lstio_tes_loop, args->lstio_tes_concur, - args->lstio_tes_dist, args->lstio_tes_span, - src_name, dst_name, param, - args->lstio_tes_param_len, - &ret, args->lstio_tes_resultp); - - if (!rc && ret) - rc = (copy_to_user(args->lstio_tes_retp, &ret, - sizeof(ret))) ? -EFAULT : 0; -out: - kfree(param); - - return rc; -} - -int -lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr) -{ - char *buf; - struct libcfs_ioctl_data *data; - int opc; - int rc; - - if (cmd != IOC_LIBCFS_LNETST) - return -EINVAL; - - data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); - - opc = data->ioc_u32[0]; - - if (data->ioc_plen1 > PAGE_SIZE) - return -EINVAL; - - buf = kmalloc(data->ioc_plen1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - /* copy in parameter */ - if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { - kfree(buf); - return -EFAULT; - } - - mutex_lock(&console_session.ses_mutex); - - console_session.ses_laststamp = ktime_get_real_seconds(); - - if (console_session.ses_shutdown) { - rc = -ESHUTDOWN; - goto out; - } - - if (console_session.ses_expired) - lstcon_session_end(); - - if (opc != LSTIO_SESSION_NEW && - console_session.ses_state == LST_SESSION_NONE) { - CDEBUG(D_NET, "LST no active session\n"); - rc = -ESRCH; - goto out; - } - - memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat)); - - switch (opc) { - case LSTIO_SESSION_NEW: - rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf); - break; - case LSTIO_SESSION_END: - rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf); - break; - case LSTIO_SESSION_INFO: - rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf); - break; - case LSTIO_DEBUG: - rc = lst_debug_ioctl((struct lstio_debug_args *)buf); - break; - case LSTIO_GROUP_ADD: - rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf); - break; - case LSTIO_GROUP_DEL: - rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf); - break; - case LSTIO_GROUP_UPDATE: - rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf); - break; - case LSTIO_NODES_ADD: - rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf); - break; - case LSTIO_GROUP_LIST: - rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf); - break; - case LSTIO_GROUP_INFO: - rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf); - break; - case LSTIO_BATCH_ADD: - rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf); - break; - case LSTIO_BATCH_START: - rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf); - break; - case LSTIO_BATCH_STOP: - rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf); - break; - case LSTIO_BATCH_QUERY: - rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf); - break; - case LSTIO_BATCH_LIST: - rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf); - break; - case LSTIO_BATCH_INFO: - rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf); - break; - case LSTIO_TEST_ADD: - rc = lst_test_add_ioctl((struct lstio_test_args *)buf); - break; - case LSTIO_STAT_QUERY: - rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf); - break; - default: - rc = -EINVAL; - } - - if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat, - sizeof(struct lstcon_trans_stat))) - rc = -EFAULT; -out: - mutex_unlock(&console_session.ses_mutex); - - kfree(buf); - - return rc; -} diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c deleted file mode 100644 index 6dcc966b293b..000000000000 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ /dev/null @@ -1,1397 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * Console framework rpcs - * - * Author: Liang Zhen <liang@whamcloud.com> - */ - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> -#include "timer.h" -#include "conrpc.h" -#include "console.h" - -void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *, - struct lstcon_node *, struct lstcon_trans_stat *); - -static void -lstcon_rpc_done(struct srpc_client_rpc *rpc) -{ - struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv; - - LASSERT(crpc && rpc == crpc->crp_rpc); - LASSERT(crpc->crp_posted && !crpc->crp_finished); - - spin_lock(&rpc->crpc_lock); - - if (!crpc->crp_trans) { - /* - * Orphan RPC is not in any transaction, - * I'm just a poor body and nobody loves me - */ - spin_unlock(&rpc->crpc_lock); - - /* release it */ - lstcon_rpc_put(crpc); - return; - } - - /* not an orphan RPC */ - crpc->crp_finished = 1; - - if (!crpc->crp_stamp) { - /* not aborted */ - LASSERT(!crpc->crp_status); - - crpc->crp_stamp = cfs_time_current(); - crpc->crp_status = rpc->crpc_status; - } - - /* wakeup (transaction)thread if I'm the last RPC in the transaction */ - if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining)) - wake_up(&crpc->crp_trans->tas_waitq); - - spin_unlock(&rpc->crpc_lock); -} - -static int -lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats, - int bulk_npg, int bulk_len, int embedded, - struct lstcon_rpc *crpc) -{ - crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service, - feats, bulk_npg, bulk_len, - lstcon_rpc_done, (void *)crpc); - if (!crpc->crp_rpc) - return -ENOMEM; - - crpc->crp_trans = NULL; - crpc->crp_node = nd; - crpc->crp_posted = 0; - crpc->crp_finished = 0; - crpc->crp_unpacked = 0; - crpc->crp_status = 0; - crpc->crp_stamp = 0; - crpc->crp_embedded = embedded; - INIT_LIST_HEAD(&crpc->crp_link); - - atomic_inc(&console_session.ses_rpc_counter); - - return 0; -} - -static int -lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats, - int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp) -{ - struct lstcon_rpc *crpc = NULL; - int rc; - - spin_lock(&console_session.ses_rpc_lock); - - crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist, - struct lstcon_rpc, crp_link); - if (crpc) - list_del_init(&crpc->crp_link); - - spin_unlock(&console_session.ses_rpc_lock); - - if (!crpc) { - crpc = kzalloc(sizeof(*crpc), GFP_NOFS); - if (!crpc) - return -ENOMEM; - } - - rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc); - if (!rc) { - *crpcpp = crpc; - return 0; - } - - kfree(crpc); - - return rc; -} - -void -lstcon_rpc_put(struct lstcon_rpc *crpc) -{ - struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk; - int i; - - LASSERT(list_empty(&crpc->crp_link)); - - for (i = 0; i < bulk->bk_niov; i++) { - if (!bulk->bk_iovs[i].bv_page) - continue; - - __free_page(bulk->bk_iovs[i].bv_page); - } - - srpc_client_rpc_decref(crpc->crp_rpc); - - if (crpc->crp_embedded) { - /* embedded RPC, don't recycle it */ - memset(crpc, 0, sizeof(*crpc)); - crpc->crp_embedded = 1; - - } else { - spin_lock(&console_session.ses_rpc_lock); - - list_add(&crpc->crp_link, - &console_session.ses_rpc_freelist); - - spin_unlock(&console_session.ses_rpc_lock); - } - - /* RPC is not alive now */ - atomic_dec(&console_session.ses_rpc_counter); -} - -static void -lstcon_rpc_post(struct lstcon_rpc *crpc) -{ - struct lstcon_rpc_trans *trans = crpc->crp_trans; - - LASSERT(trans); - - atomic_inc(&trans->tas_remaining); - crpc->crp_posted = 1; - - sfw_post_rpc(crpc->crp_rpc); -} - -static char * -lstcon_rpc_trans_name(int transop) -{ - if (transop == LST_TRANS_SESNEW) - return "SESNEW"; - - if (transop == LST_TRANS_SESEND) - return "SESEND"; - - if (transop == LST_TRANS_SESQRY) - return "SESQRY"; - - if (transop == LST_TRANS_SESPING) - return "SESPING"; - - if (transop == LST_TRANS_TSBCLIADD) - return "TSBCLIADD"; - - if (transop == LST_TRANS_TSBSRVADD) - return "TSBSRVADD"; - - if (transop == LST_TRANS_TSBRUN) - return "TSBRUN"; - - if (transop == LST_TRANS_TSBSTOP) - return "TSBSTOP"; - - if (transop == LST_TRANS_TSBCLIQRY) - return "TSBCLIQRY"; - - if (transop == LST_TRANS_TSBSRVQRY) - return "TSBSRVQRY"; - - if (transop == LST_TRANS_STATQRY) - return "STATQRY"; - - return "Unknown"; -} - -int -lstcon_rpc_trans_prep(struct list_head *translist, int transop, - struct lstcon_rpc_trans **transpp) -{ - struct lstcon_rpc_trans *trans; - - if (translist) { - list_for_each_entry(trans, translist, tas_link) { - /* - * Can't enqueue two private transaction on - * the same object - */ - if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE) - return -EPERM; - } - } - - /* create a trans group */ - trans = kzalloc(sizeof(*trans), GFP_NOFS); - if (!trans) - return -ENOMEM; - - trans->tas_opc = transop; - - if (!translist) - INIT_LIST_HEAD(&trans->tas_olink); - else - list_add_tail(&trans->tas_olink, translist); - - list_add_tail(&trans->tas_link, &console_session.ses_trans_list); - - INIT_LIST_HEAD(&trans->tas_rpcs_list); - atomic_set(&trans->tas_remaining, 0); - init_waitqueue_head(&trans->tas_waitq); - - spin_lock(&console_session.ses_rpc_lock); - trans->tas_features = console_session.ses_features; - spin_unlock(&console_session.ses_rpc_lock); - - *transpp = trans; - return 0; -} - -void -lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc) -{ - list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list); - crpc->crp_trans = trans; -} - -void -lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error) -{ - struct srpc_client_rpc *rpc; - struct lstcon_rpc *crpc; - struct lstcon_node *nd; - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - rpc = crpc->crp_rpc; - - spin_lock(&rpc->crpc_lock); - - if (!crpc->crp_posted || /* not posted */ - crpc->crp_stamp) { /* rpc done or aborted already */ - if (!crpc->crp_stamp) { - crpc->crp_stamp = cfs_time_current(); - crpc->crp_status = -EINTR; - } - spin_unlock(&rpc->crpc_lock); - continue; - } - - crpc->crp_stamp = cfs_time_current(); - crpc->crp_status = error; - - spin_unlock(&rpc->crpc_lock); - - sfw_abort_rpc(rpc); - - if (error != -ETIMEDOUT) - continue; - - nd = crpc->crp_node; - if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp)) - continue; - - nd->nd_stamp = crpc->crp_stamp; - nd->nd_state = LST_NODE_DOWN; - } -} - -static int -lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans) -{ - if (console_session.ses_shutdown && - !list_empty(&trans->tas_olink)) /* Not an end session RPC */ - return 1; - - return !atomic_read(&trans->tas_remaining) ? 1 : 0; -} - -int -lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout) -{ - struct lstcon_rpc *crpc; - int rc; - - if (list_empty(&trans->tas_rpcs_list)) - return 0; - - if (timeout < LST_TRANS_MIN_TIMEOUT) - timeout = LST_TRANS_MIN_TIMEOUT; - - CDEBUG(D_NET, "Transaction %s started\n", - lstcon_rpc_trans_name(trans->tas_opc)); - - /* post all requests */ - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - LASSERT(!crpc->crp_posted); - - lstcon_rpc_post(crpc); - } - - mutex_unlock(&console_session.ses_mutex); - - rc = wait_event_interruptible_timeout(trans->tas_waitq, - lstcon_rpc_trans_check(trans), - timeout * HZ); - rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT); - - mutex_lock(&console_session.ses_mutex); - - if (console_session.ses_shutdown) - rc = -ESHUTDOWN; - - if (rc || atomic_read(&trans->tas_remaining)) { - /* treat short timeout as canceled */ - if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2) - rc = -EINTR; - - lstcon_rpc_trans_abort(trans, rc); - } - - CDEBUG(D_NET, "Transaction %s stopped: %d\n", - lstcon_rpc_trans_name(trans->tas_opc), rc); - - lstcon_rpc_trans_stat(trans, lstcon_trans_stat()); - - return rc; -} - -static int -lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp) -{ - struct lstcon_node *nd = crpc->crp_node; - struct srpc_client_rpc *rpc = crpc->crp_rpc; - struct srpc_generic_reply *rep; - - LASSERT(nd && rpc); - LASSERT(crpc->crp_stamp); - - if (crpc->crp_status) { - *msgpp = NULL; - return crpc->crp_status; - } - - *msgpp = &rpc->crpc_replymsg; - if (!crpc->crp_unpacked) { - sfw_unpack_message(*msgpp); - crpc->crp_unpacked = 1; - } - - if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp)) - return 0; - - nd->nd_stamp = crpc->crp_stamp; - rep = &(*msgpp)->msg_body.reply; - - if (rep->sid.ses_nid == LNET_NID_ANY) - nd->nd_state = LST_NODE_UNKNOWN; - else if (lstcon_session_match(rep->sid)) - nd->nd_state = LST_NODE_ACTIVE; - else - nd->nd_state = LST_NODE_BUSY; - - return 0; -} - -void -lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, struct lstcon_trans_stat *stat) -{ - struct lstcon_rpc *crpc; - struct srpc_msg *rep; - int error; - - LASSERT(stat); - - memset(stat, 0, sizeof(*stat)); - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - lstcon_rpc_stat_total(stat, 1); - - LASSERT(crpc->crp_stamp); - - error = lstcon_rpc_get_reply(crpc, &rep); - if (error) { - lstcon_rpc_stat_failure(stat, 1); - if (!stat->trs_rpc_errno) - stat->trs_rpc_errno = -error; - - continue; - } - - lstcon_rpc_stat_success(stat, 1); - - lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat); - } - - if (trans->tas_opc == LST_TRANS_SESNEW && !stat->trs_fwk_errno) { - stat->trs_fwk_errno = - lstcon_session_feats_check(trans->tas_features); - } - - CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, RPC error(%d), Framework error(%d)\n", - lstcon_rpc_trans_name(trans->tas_opc), - lstcon_rpc_stat_success(stat, 0), - lstcon_rpc_stat_failure(stat, 0), - lstcon_rpc_stat_total(stat, 0), - stat->trs_rpc_errno, stat->trs_fwk_errno); -} - -int -lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans, - struct list_head __user *head_up, - lstcon_rpc_readent_func_t readent) -{ - struct list_head tmp; - struct list_head __user *next; - struct lstcon_rpc_ent *ent; - struct srpc_generic_reply *rep; - struct lstcon_rpc *crpc; - struct srpc_msg *msg; - struct lstcon_node *nd; - long dur; - struct timeval tv; - int error; - - LASSERT(head_up); - - next = head_up; - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - if (copy_from_user(&tmp, next, - sizeof(struct list_head))) - return -EFAULT; - - next = tmp.next; - if (next == head_up) - return 0; - - ent = list_entry(next, struct lstcon_rpc_ent, rpe_link); - - LASSERT(crpc->crp_stamp); - - error = lstcon_rpc_get_reply(crpc, &msg); - - nd = crpc->crp_node; - - dur = (long)cfs_time_sub(crpc->crp_stamp, - (unsigned long)console_session.ses_id.ses_stamp); - jiffies_to_timeval(dur, &tv); - - if (copy_to_user(&ent->rpe_peer, &nd->nd_id, - sizeof(struct lnet_process_id)) || - copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) || - copy_to_user(&ent->rpe_state, &nd->nd_state, - sizeof(nd->nd_state)) || - copy_to_user(&ent->rpe_rpc_errno, &error, - sizeof(error))) - return -EFAULT; - - if (error) - continue; - - /* RPC is done */ - rep = (struct srpc_generic_reply *)&msg->msg_body.reply; - - if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(rep->sid)) || - copy_to_user(&ent->rpe_fwk_errno, &rep->status, - sizeof(rep->status))) - return -EFAULT; - - if (!readent) - continue; - - error = readent(trans->tas_opc, msg, ent); - if (error) - return error; - } - - return 0; -} - -void -lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans) -{ - struct srpc_client_rpc *rpc; - struct lstcon_rpc *crpc; - struct lstcon_rpc *tmp; - int count = 0; - - list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) { - rpc = crpc->crp_rpc; - - spin_lock(&rpc->crpc_lock); - - /* free it if not posted or finished already */ - if (!crpc->crp_posted || crpc->crp_finished) { - spin_unlock(&rpc->crpc_lock); - - list_del_init(&crpc->crp_link); - lstcon_rpc_put(crpc); - - continue; - } - - /* - * rpcs can be still not callbacked (even LNetMDUnlink is - * called) because huge timeout for inaccessible network, - * don't make user wait for them, just abandon them, they - * will be recycled in callback - */ - LASSERT(crpc->crp_status); - - crpc->crp_node = NULL; - crpc->crp_trans = NULL; - list_del_init(&crpc->crp_link); - count++; - - spin_unlock(&rpc->crpc_lock); - - atomic_dec(&trans->tas_remaining); - } - - LASSERT(!atomic_read(&trans->tas_remaining)); - - list_del(&trans->tas_link); - if (!list_empty(&trans->tas_olink)) - list_del(&trans->tas_olink); - - CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n", - lstcon_rpc_trans_name(trans->tas_opc), count); - - kfree(trans); -} - -int -lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, - unsigned int feats, struct lstcon_rpc **crpc) -{ - struct srpc_mksn_reqst *msrq; - struct srpc_rmsn_reqst *rsrq; - int rc; - - switch (transop) { - case LST_TRANS_SESNEW: - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION, - feats, 0, 0, crpc); - if (rc) - return rc; - - msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst; - msrq->mksn_sid = console_session.ses_id; - msrq->mksn_force = console_session.ses_force; - strlcpy(msrq->mksn_name, console_session.ses_name, - sizeof(msrq->mksn_name)); - break; - - case LST_TRANS_SESEND: - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION, - feats, 0, 0, crpc); - if (rc) - return rc; - - rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst; - rsrq->rmsn_sid = console_session.ses_id; - break; - - default: - LBUG(); - } - - return 0; -} - -int -lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats, - struct lstcon_rpc **crpc) -{ - struct srpc_debug_reqst *drq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc); - if (rc) - return rc; - - drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - - drq->dbg_sid = console_session.ses_id; - drq->dbg_flags = 0; - - return rc; -} - -int -lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats, - struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc) -{ - struct lstcon_batch *batch; - struct srpc_batch_reqst *brq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc); - if (rc) - return rc; - - brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst; - - brq->bar_sid = console_session.ses_id; - brq->bar_bid = tsb->tsb_id; - brq->bar_testidx = tsb->tsb_index; - brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN : - (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP : - SRPC_BATCH_OPC_QUERY); - - if (transop != LST_TRANS_TSBRUN && - transop != LST_TRANS_TSBSTOP) - return 0; - - LASSERT(!tsb->tsb_index); - - batch = (struct lstcon_batch *)tsb; - brq->bar_arg = batch->bat_arg; - - return 0; -} - -int -lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats, - struct lstcon_rpc **crpc) -{ - struct srpc_stat_reqst *srq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc); - if (rc) - return rc; - - srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst; - - srq->str_sid = console_session.ses_id; - srq->str_type = 0; /* XXX remove it */ - - return 0; -} - -static struct lnet_process_id_packed * -lstcon_next_id(int idx, int nkiov, struct bio_vec *kiov) -{ - struct lnet_process_id_packed *pid; - int i; - - i = idx / SFW_ID_PER_PAGE; - - LASSERT(i < nkiov); - - pid = (struct lnet_process_id_packed *)page_address(kiov[i].bv_page); - - return &pid[idx % SFW_ID_PER_PAGE]; -} - -static int -lstcon_dstnodes_prep(struct lstcon_group *grp, int idx, - int dist, int span, int nkiov, struct bio_vec *kiov) -{ - struct lnet_process_id_packed *pid; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int start; - int end; - int i = 0; - - LASSERT(dist >= 1); - LASSERT(span >= 1); - LASSERT(grp->grp_nnode >= 1); - - if (span > grp->grp_nnode) - return -EINVAL; - - start = ((idx / dist) * span) % grp->grp_nnode; - end = ((idx / dist) * span + span - 1) % grp->grp_nnode; - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) { - nd = ndl->ndl_node; - if (i < start) { - i++; - continue; - } - - if (i > (end >= start ? end : grp->grp_nnode)) - break; - - pid = lstcon_next_id((i - start), nkiov, kiov); - pid->nid = nd->nd_id.nid; - pid->pid = nd->nd_id.pid; - i++; - } - - if (start <= end) /* done */ - return 0; - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) { - if (i > grp->grp_nnode + end) - break; - - nd = ndl->ndl_node; - pid = lstcon_next_id((i - start), nkiov, kiov); - pid->nid = nd->nd_id.nid; - pid->pid = nd->nd_id.pid; - i++; - } - - return 0; -} - -static int -lstcon_pingrpc_prep(struct lst_test_ping_param *param, struct srpc_test_reqst *req) -{ - struct test_ping_req *prq = &req->tsr_u.ping; - - prq->png_size = param->png_size; - prq->png_flags = param->png_flags; - /* TODO dest */ - return 0; -} - -static int -lstcon_bulkrpc_v0_prep(struct lst_test_bulk_param *param, - struct srpc_test_reqst *req) -{ - struct test_bulk_req *brq = &req->tsr_u.bulk_v0; - - brq->blk_opc = param->blk_opc; - brq->blk_npg = DIV_ROUND_UP(param->blk_size, PAGE_SIZE); - brq->blk_flags = param->blk_flags; - - return 0; -} - -static int -lstcon_bulkrpc_v1_prep(struct lst_test_bulk_param *param, bool is_client, - struct srpc_test_reqst *req) -{ - struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1; - - brq->blk_opc = param->blk_opc; - brq->blk_flags = param->blk_flags; - brq->blk_len = param->blk_size; - brq->blk_offset = is_client ? param->blk_cli_off : param->blk_srv_off; - - return 0; -} - -int -lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats, - struct lstcon_test *test, struct lstcon_rpc **crpc) -{ - struct lstcon_group *sgrp = test->tes_src_grp; - struct lstcon_group *dgrp = test->tes_dst_grp; - struct srpc_test_reqst *trq; - struct srpc_bulk *bulk; - int i; - int npg = 0; - int nob = 0; - int rc = 0; - - if (transop == LST_TRANS_TSBCLIADD) { - npg = sfw_id_pages(test->tes_span); - nob = !(feats & LST_FEAT_BULK_LEN) ? - npg * PAGE_SIZE : - sizeof(struct lnet_process_id_packed) * test->tes_span; - } - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc); - if (rc) - return rc; - - trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst; - - if (transop == LST_TRANS_TSBSRVADD) { - int ndist = DIV_ROUND_UP(sgrp->grp_nnode, test->tes_dist); - int nspan = DIV_ROUND_UP(dgrp->grp_nnode, test->tes_span); - int nmax = DIV_ROUND_UP(ndist, nspan); - - trq->tsr_ndest = 0; - trq->tsr_loop = nmax * test->tes_dist * test->tes_concur; - } else { - bulk = &(*crpc)->crp_rpc->crpc_bulk; - - for (i = 0; i < npg; i++) { - int len; - - LASSERT(nob > 0); - - len = !(feats & LST_FEAT_BULK_LEN) ? - PAGE_SIZE : - min_t(int, nob, PAGE_SIZE); - nob -= len; - - bulk->bk_iovs[i].bv_offset = 0; - bulk->bk_iovs[i].bv_len = len; - bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL); - - if (!bulk->bk_iovs[i].bv_page) { - lstcon_rpc_put(*crpc); - return -ENOMEM; - } - } - - bulk->bk_sink = 0; - - LASSERT(transop == LST_TRANS_TSBCLIADD); - - rc = lstcon_dstnodes_prep(test->tes_dst_grp, - test->tes_cliidx++, - test->tes_dist, - test->tes_span, - npg, &bulk->bk_iovs[0]); - if (rc) { - lstcon_rpc_put(*crpc); - return rc; - } - - trq->tsr_ndest = test->tes_span; - trq->tsr_loop = test->tes_loop; - } - - trq->tsr_sid = console_session.ses_id; - trq->tsr_bid = test->tes_hdr.tsb_id; - trq->tsr_concur = test->tes_concur; - trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0; - trq->tsr_stop_onerr = !!test->tes_stop_onerr; - - switch (test->tes_type) { - case LST_TEST_PING: - trq->tsr_service = SRPC_SERVICE_PING; - rc = lstcon_pingrpc_prep((struct lst_test_ping_param *) - &test->tes_param[0], trq); - break; - - case LST_TEST_BULK: - trq->tsr_service = SRPC_SERVICE_BRW; - if (!(feats & LST_FEAT_BULK_LEN)) { - rc = lstcon_bulkrpc_v0_prep((struct lst_test_bulk_param *) - &test->tes_param[0], trq); - } else { - rc = lstcon_bulkrpc_v1_prep((struct lst_test_bulk_param *) - &test->tes_param[0], - trq->tsr_is_client, trq); - } - - break; - default: - LBUG(); - break; - } - - return rc; -} - -static int -lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans, - struct lstcon_node *nd, struct srpc_msg *reply) -{ - struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply; - int status = mksn_rep->mksn_status; - - if (!status && - (reply->msg_ses_feats & ~LST_FEATS_MASK)) { - mksn_rep->mksn_status = EPROTO; - status = EPROTO; - } - - if (status == EPROTO) { - CNETERR("session protocol error from %s: %u\n", - libcfs_nid2str(nd->nd_id.nid), - reply->msg_ses_feats); - } - - if (status) - return status; - - if (!trans->tas_feats_updated) { - spin_lock(&console_session.ses_rpc_lock); - if (!trans->tas_feats_updated) { /* recheck with lock */ - trans->tas_feats_updated = 1; - trans->tas_features = reply->msg_ses_feats; - } - spin_unlock(&console_session.ses_rpc_lock); - } - - if (reply->msg_ses_feats != trans->tas_features) { - CNETERR("Framework features %x from %s is different with features on this transaction: %x\n", - reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid), - trans->tas_features); - mksn_rep->mksn_status = EPROTO; - status = EPROTO; - } - - if (!status) { - /* session timeout on remote node */ - nd->nd_timeout = mksn_rep->mksn_timeout; - } - - return status; -} - -void -lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg, - struct lstcon_node *nd, struct lstcon_trans_stat *stat) -{ - struct srpc_rmsn_reply *rmsn_rep; - struct srpc_debug_reply *dbg_rep; - struct srpc_batch_reply *bat_rep; - struct srpc_test_reply *test_rep; - struct srpc_stat_reply *stat_rep; - int rc = 0; - - switch (trans->tas_opc) { - case LST_TRANS_SESNEW: - rc = lstcon_sesnew_stat_reply(trans, nd, msg); - if (!rc) { - lstcon_sesop_stat_success(stat, 1); - return; - } - - lstcon_sesop_stat_failure(stat, 1); - break; - - case LST_TRANS_SESEND: - rmsn_rep = &msg->msg_body.rmsn_reply; - /* ESRCH is not an error for end session */ - if (!rmsn_rep->rmsn_status || - rmsn_rep->rmsn_status == ESRCH) { - lstcon_sesop_stat_success(stat, 1); - return; - } - - lstcon_sesop_stat_failure(stat, 1); - rc = rmsn_rep->rmsn_status; - break; - - case LST_TRANS_SESQRY: - case LST_TRANS_SESPING: - dbg_rep = &msg->msg_body.dbg_reply; - - if (dbg_rep->dbg_status == ESRCH) { - lstcon_sesqry_stat_unknown(stat, 1); - return; - } - - if (lstcon_session_match(dbg_rep->dbg_sid)) - lstcon_sesqry_stat_active(stat, 1); - else - lstcon_sesqry_stat_busy(stat, 1); - return; - - case LST_TRANS_TSBRUN: - case LST_TRANS_TSBSTOP: - bat_rep = &msg->msg_body.bat_reply; - - if (!bat_rep->bar_status) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - if (bat_rep->bar_status == EPERM && - trans->tas_opc == LST_TRANS_TSBSTOP) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - lstcon_tsbop_stat_failure(stat, 1); - rc = bat_rep->bar_status; - break; - - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - bat_rep = &msg->msg_body.bat_reply; - - if (bat_rep->bar_active) - lstcon_tsbqry_stat_run(stat, 1); - else - lstcon_tsbqry_stat_idle(stat, 1); - - if (!bat_rep->bar_status) - return; - - lstcon_tsbqry_stat_failure(stat, 1); - rc = bat_rep->bar_status; - break; - - case LST_TRANS_TSBCLIADD: - case LST_TRANS_TSBSRVADD: - test_rep = &msg->msg_body.tes_reply; - - if (!test_rep->tsr_status) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - lstcon_tsbop_stat_failure(stat, 1); - rc = test_rep->tsr_status; - break; - - case LST_TRANS_STATQRY: - stat_rep = &msg->msg_body.stat_reply; - - if (!stat_rep->str_status) { - lstcon_statqry_stat_success(stat, 1); - return; - } - - lstcon_statqry_stat_failure(stat, 1); - rc = stat_rep->str_status; - break; - - default: - LBUG(); - } - - if (!stat->trs_fwk_errno) - stat->trs_fwk_errno = rc; -} - -int -lstcon_rpc_trans_ndlist(struct list_head *ndlist, - struct list_head *translist, int transop, - void *arg, lstcon_rpc_cond_func_t condition, - struct lstcon_rpc_trans **transpp) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - struct lstcon_rpc *rpc; - unsigned int feats; - int rc; - - /* Creating session RPG for list of nodes */ - - rc = lstcon_rpc_trans_prep(translist, transop, &trans); - if (rc) { - CERROR("Can't create transaction %d: %d\n", transop, rc); - return rc; - } - - feats = trans->tas_features; - list_for_each_entry(ndl, ndlist, ndl_link) { - rc = !condition ? 1 : - condition(transop, ndl->ndl_node, arg); - - if (!rc) - continue; - - if (rc < 0) { - CDEBUG(D_NET, "Condition error while creating RPC for transaction %d: %d\n", - transop, rc); - break; - } - - nd = ndl->ndl_node; - - switch (transop) { - case LST_TRANS_SESNEW: - case LST_TRANS_SESEND: - rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc); - break; - case LST_TRANS_SESQRY: - case LST_TRANS_SESPING: - rc = lstcon_dbgrpc_prep(nd, feats, &rpc); - break; - case LST_TRANS_TSBCLIADD: - case LST_TRANS_TSBSRVADD: - rc = lstcon_testrpc_prep(nd, transop, feats, - (struct lstcon_test *)arg, - &rpc); - break; - case LST_TRANS_TSBRUN: - case LST_TRANS_TSBSTOP: - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - rc = lstcon_batrpc_prep(nd, transop, feats, - (struct lstcon_tsb_hdr *)arg, - &rpc); - break; - case LST_TRANS_STATQRY: - rc = lstcon_statrpc_prep(nd, feats, &rpc); - break; - default: - rc = -EINVAL; - break; - } - - if (rc) { - CERROR("Failed to create RPC for transaction %s: %d\n", - lstcon_rpc_trans_name(transop), rc); - break; - } - - lstcon_rpc_trans_addreq(trans, rpc); - } - - if (!rc) { - *transpp = trans; - return 0; - } - - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -static void -lstcon_rpc_pinger(void *arg) -{ - struct stt_timer *ptimer = (struct stt_timer *)arg; - struct lstcon_rpc_trans *trans; - struct lstcon_rpc *crpc; - struct srpc_msg *rep; - struct srpc_debug_reqst *drq; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int intv; - int count = 0; - int rc; - - /* - * RPC pinger is a special case of transaction, - * it's called by timer at 8 seconds interval. - */ - mutex_lock(&console_session.ses_mutex); - - if (console_session.ses_shutdown || console_session.ses_expired) { - mutex_unlock(&console_session.ses_mutex); - return; - } - - if (!console_session.ses_expired && - ktime_get_real_seconds() - console_session.ses_laststamp > - (time64_t)console_session.ses_timeout) - console_session.ses_expired = 1; - - trans = console_session.ses_ping; - - LASSERT(trans); - - list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) { - nd = ndl->ndl_node; - - if (console_session.ses_expired) { - /* idle console, end session on all nodes */ - if (nd->nd_state != LST_NODE_ACTIVE) - continue; - - rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND, - trans->tas_features, &crpc); - if (rc) { - CERROR("Out of memory\n"); - break; - } - - lstcon_rpc_trans_addreq(trans, crpc); - lstcon_rpc_post(crpc); - - continue; - } - - crpc = &nd->nd_ping; - - if (crpc->crp_rpc) { - LASSERT(crpc->crp_trans == trans); - LASSERT(!list_empty(&crpc->crp_link)); - - spin_lock(&crpc->crp_rpc->crpc_lock); - - LASSERT(crpc->crp_posted); - - if (!crpc->crp_finished) { - /* in flight */ - spin_unlock(&crpc->crp_rpc->crpc_lock); - continue; - } - - spin_unlock(&crpc->crp_rpc->crpc_lock); - - lstcon_rpc_get_reply(crpc, &rep); - - list_del_init(&crpc->crp_link); - - lstcon_rpc_put(crpc); - } - - if (nd->nd_state != LST_NODE_ACTIVE) - continue; - - intv = (jiffies - nd->nd_stamp) / msecs_to_jiffies(MSEC_PER_SEC); - if (intv < nd->nd_timeout / 2) - continue; - - rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG, - trans->tas_features, 0, 0, 1, crpc); - if (rc) { - CERROR("Out of memory\n"); - break; - } - - drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - - drq->dbg_sid = console_session.ses_id; - drq->dbg_flags = 0; - - lstcon_rpc_trans_addreq(trans, crpc); - lstcon_rpc_post(crpc); - - count++; - } - - if (console_session.ses_expired) { - mutex_unlock(&console_session.ses_mutex); - return; - } - - CDEBUG(D_NET, "Ping %d nodes in session\n", count); - - ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL; - stt_add_timer(ptimer); - - mutex_unlock(&console_session.ses_mutex); -} - -int -lstcon_rpc_pinger_start(void) -{ - struct stt_timer *ptimer; - int rc; - - LASSERT(list_empty(&console_session.ses_rpc_freelist)); - LASSERT(!atomic_read(&console_session.ses_rpc_counter)); - - rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING, - &console_session.ses_ping); - if (rc) { - CERROR("Failed to create console pinger\n"); - return rc; - } - - ptimer = &console_session.ses_ping_timer; - ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL; - - stt_add_timer(ptimer); - - return 0; -} - -void -lstcon_rpc_pinger_stop(void) -{ - LASSERT(console_session.ses_shutdown); - - stt_del_timer(&console_session.ses_ping_timer); - - lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN); - lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat()); - lstcon_rpc_trans_destroy(console_session.ses_ping); - - memset(lstcon_trans_stat(), 0, sizeof(struct lstcon_trans_stat)); - - console_session.ses_ping = NULL; -} - -void -lstcon_rpc_cleanup_wait(void) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_rpc *crpc; - struct lstcon_rpc *temp; - struct list_head *pacer; - struct list_head zlist; - - /* Called with hold of global mutex */ - - LASSERT(console_session.ses_shutdown); - - while (!list_empty(&console_session.ses_trans_list)) { - list_for_each(pacer, &console_session.ses_trans_list) { - trans = list_entry(pacer, struct lstcon_rpc_trans, - tas_link); - - CDEBUG(D_NET, "Session closed, wakeup transaction %s\n", - lstcon_rpc_trans_name(trans->tas_opc)); - - wake_up(&trans->tas_waitq); - } - - mutex_unlock(&console_session.ses_mutex); - - CWARN("Session is shutting down, waiting for termination of transactions\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - - mutex_lock(&console_session.ses_mutex); - } - - spin_lock(&console_session.ses_rpc_lock); - - lst_wait_until(!atomic_read(&console_session.ses_rpc_counter), - console_session.ses_rpc_lock, - "Network is not accessible or target is down, waiting for %d console RPCs to being recycled\n", - atomic_read(&console_session.ses_rpc_counter)); - - list_add(&zlist, &console_session.ses_rpc_freelist); - list_del_init(&console_session.ses_rpc_freelist); - - spin_unlock(&console_session.ses_rpc_lock); - - list_for_each_entry_safe(crpc, temp, &zlist, crp_link) { - list_del(&crpc->crp_link); - kfree(crpc); - } -} - -int -lstcon_rpc_module_init(void) -{ - INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list); - console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger; - console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer; - - console_session.ses_ping = NULL; - - spin_lock_init(&console_session.ses_rpc_lock); - atomic_set(&console_session.ses_rpc_counter, 0); - INIT_LIST_HEAD(&console_session.ses_rpc_freelist); - - return 0; -} - -void -lstcon_rpc_module_fini(void) -{ - LASSERT(list_empty(&console_session.ses_rpc_freelist)); - LASSERT(!atomic_read(&console_session.ses_rpc_counter)); -} diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h deleted file mode 100644 index 374a5f31ef6f..000000000000 --- a/drivers/staging/lustre/lnet/selftest/conrpc.h +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * /lnet/selftest/conrpc.h - * - * Console rpc - * - * Author: Liang Zhen <liang@whamcloud.com> - */ - -#ifndef __LST_CONRPC_H__ -#define __LST_CONRPC_H__ - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-types.h> -#include <uapi/linux/lnet/lnetst.h> -#include "rpc.h" -#include "selftest.h" - -/* Console rpc and rpc transaction */ -#define LST_TRANS_TIMEOUT 30 -#define LST_TRANS_MIN_TIMEOUT 3 - -#define LST_VALIDATE_TIMEOUT(t) min(max(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT) - -#define LST_PING_INTERVAL 8 - -struct lstcon_rpc_trans; -struct lstcon_tsb_hdr; -struct lstcon_test; -struct lstcon_node; - -struct lstcon_rpc { - struct list_head crp_link; /* chain on rpc transaction */ - struct srpc_client_rpc *crp_rpc; /* client rpc */ - struct lstcon_node *crp_node; /* destination node */ - struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */ - - unsigned int crp_posted:1; /* rpc is posted */ - unsigned int crp_finished:1; /* rpc is finished */ - unsigned int crp_unpacked:1; /* reply is unpacked */ - /** RPC is embedded in other structure and can't free it */ - unsigned int crp_embedded:1; - int crp_status; /* console rpc errors */ - unsigned long crp_stamp; /* replied time stamp */ -}; - -struct lstcon_rpc_trans { - struct list_head tas_olink; /* link chain on owner list */ - struct list_head tas_link; /* link chain on global list */ - int tas_opc; /* operation code of transaction */ - unsigned int tas_feats_updated; /* features mask is uptodate */ - unsigned int tas_features; /* test features mask */ - wait_queue_head_t tas_waitq; /* wait queue head */ - atomic_t tas_remaining; /* # of un-scheduled rpcs */ - struct list_head tas_rpcs_list; /* queued requests */ -}; - -#define LST_TRANS_PRIVATE 0x1000 - -#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01) -#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02) -#define LST_TRANS_SESQRY 0x03 -#define LST_TRANS_SESPING 0x04 - -#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11) -#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12) -#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13) -#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14) -#define LST_TRANS_TSBCLIQRY 0x15 -#define LST_TRANS_TSBSRVQRY 0x16 - -#define LST_TRANS_STATQRY 0x21 - -typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *); -typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *, - struct lstcon_rpc_ent __user *); - -int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, - unsigned int version, struct lstcon_rpc **crpc); -int lstcon_dbgrpc_prep(struct lstcon_node *nd, - unsigned int version, struct lstcon_rpc **crpc); -int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, - unsigned int version, struct lstcon_tsb_hdr *tsb, - struct lstcon_rpc **crpc); -int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, - unsigned int version, struct lstcon_test *test, - struct lstcon_rpc **crpc); -int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version, - struct lstcon_rpc **crpc); -void lstcon_rpc_put(struct lstcon_rpc *crpc); -int lstcon_rpc_trans_prep(struct list_head *translist, - int transop, struct lstcon_rpc_trans **transpp); -int lstcon_rpc_trans_ndlist(struct list_head *ndlist, - struct list_head *translist, int transop, - void *arg, lstcon_rpc_cond_func_t condition, - struct lstcon_rpc_trans **transpp); -void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, - struct lstcon_trans_stat *stat); -int lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans, - struct list_head __user *head_up, - lstcon_rpc_readent_func_t readent); -void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error); -void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans); -void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, - struct lstcon_rpc *req); -int lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout); -int lstcon_rpc_pinger_start(void); -void lstcon_rpc_pinger_stop(void); -void lstcon_rpc_cleanup_wait(void); -int lstcon_rpc_module_init(void); -void lstcon_rpc_module_fini(void); - -#endif diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c deleted file mode 100644 index 1acd5cb324b1..000000000000 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ /dev/null @@ -1,2101 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * Infrastructure of LST console - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - */ - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> -#include "console.h" -#include "conrpc.h" - -#define LST_NODE_STATE_COUNTER(nd, p) \ -do { \ - if ((nd)->nd_state == LST_NODE_ACTIVE) \ - (p)->nle_nactive++; \ - else if ((nd)->nd_state == LST_NODE_BUSY) \ - (p)->nle_nbusy++; \ - else if ((nd)->nd_state == LST_NODE_DOWN) \ - (p)->nle_ndown++; \ - else \ - (p)->nle_nunknown++; \ - (p)->nle_nnode++; \ -} while (0) - -struct lstcon_session console_session; - -static void -lstcon_node_get(struct lstcon_node *nd) -{ - LASSERT(nd->nd_ref >= 1); - - nd->nd_ref++; -} - -static int -lstcon_node_find(struct lnet_process_id id, struct lstcon_node **ndpp, - int create) -{ - struct lstcon_ndlink *ndl; - unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE; - - LASSERT(id.nid != LNET_NID_ANY); - - list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], - ndl_hlink) { - if (ndl->ndl_node->nd_id.nid != id.nid || - ndl->ndl_node->nd_id.pid != id.pid) - continue; - - lstcon_node_get(ndl->ndl_node); - *ndpp = ndl->ndl_node; - return 0; - } - - if (!create) - return -ENOENT; - - *ndpp = kzalloc(sizeof(**ndpp) + sizeof(*ndl), GFP_KERNEL); - if (!*ndpp) - return -ENOMEM; - - ndl = (struct lstcon_ndlink *)(*ndpp + 1); - - ndl->ndl_node = *ndpp; - - ndl->ndl_node->nd_ref = 1; - ndl->ndl_node->nd_id = id; - ndl->ndl_node->nd_stamp = cfs_time_current(); - ndl->ndl_node->nd_state = LST_NODE_UNKNOWN; - ndl->ndl_node->nd_timeout = 0; - memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc)); - - /* - * queued in global hash & list, no refcount is taken by - * global hash & list, if caller release his refcount, - * node will be released - */ - list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]); - list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list); - - return 0; -} - -static void -lstcon_node_put(struct lstcon_node *nd) -{ - struct lstcon_ndlink *ndl; - - LASSERT(nd->nd_ref > 0); - - if (--nd->nd_ref > 0) - return; - - ndl = (struct lstcon_ndlink *)(nd + 1); - - LASSERT(!list_empty(&ndl->ndl_link)); - LASSERT(!list_empty(&ndl->ndl_hlink)); - - /* remove from session */ - list_del(&ndl->ndl_link); - list_del(&ndl->ndl_hlink); - - kfree(nd); -} - -static int -lstcon_ndlink_find(struct list_head *hash, struct lnet_process_id id, - struct lstcon_ndlink **ndlpp, int create) -{ - unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int rc; - - if (id.nid == LNET_NID_ANY) - return -EINVAL; - - /* search in hash */ - list_for_each_entry(ndl, &hash[idx], ndl_hlink) { - if (ndl->ndl_node->nd_id.nid != id.nid || - ndl->ndl_node->nd_id.pid != id.pid) - continue; - - *ndlpp = ndl; - return 0; - } - - if (!create) - return -ENOENT; - - /* find or create in session hash */ - rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0); - if (rc) - return rc; - - ndl = kzalloc(sizeof(struct lstcon_ndlink), GFP_NOFS); - if (!ndl) { - lstcon_node_put(nd); - return -ENOMEM; - } - - *ndlpp = ndl; - - ndl->ndl_node = nd; - INIT_LIST_HEAD(&ndl->ndl_link); - list_add_tail(&ndl->ndl_hlink, &hash[idx]); - - return 0; -} - -static void -lstcon_ndlink_release(struct lstcon_ndlink *ndl) -{ - LASSERT(list_empty(&ndl->ndl_link)); - LASSERT(!list_empty(&ndl->ndl_hlink)); - - list_del(&ndl->ndl_hlink); /* delete from hash */ - lstcon_node_put(ndl->ndl_node); - - kfree(ndl); -} - -static int -lstcon_group_alloc(char *name, struct lstcon_group **grpp) -{ - struct lstcon_group *grp; - int i; - - grp = kmalloc(offsetof(struct lstcon_group, - grp_ndl_hash[LST_NODE_HASHSIZE]), - GFP_KERNEL); - if (!grp) - return -ENOMEM; - - grp->grp_ref = 1; - if (name) { - if (strlen(name) > sizeof(grp->grp_name) - 1) { - kfree(grp); - return -E2BIG; - } - strncpy(grp->grp_name, name, sizeof(grp->grp_name)); - } - - INIT_LIST_HEAD(&grp->grp_link); - INIT_LIST_HEAD(&grp->grp_ndl_list); - INIT_LIST_HEAD(&grp->grp_trans_list); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) - INIT_LIST_HEAD(&grp->grp_ndl_hash[i]); - - *grpp = grp; - - return 0; -} - -static void -lstcon_group_addref(struct lstcon_group *grp) -{ - grp->grp_ref++; -} - -static void lstcon_group_ndlink_release(struct lstcon_group *, - struct lstcon_ndlink *); - -static void -lstcon_group_drain(struct lstcon_group *grp, int keep) -{ - struct lstcon_ndlink *ndl; - struct lstcon_ndlink *tmp; - - list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) { - if (!(ndl->ndl_node->nd_state & keep)) - lstcon_group_ndlink_release(grp, ndl); - } -} - -static void -lstcon_group_decref(struct lstcon_group *grp) -{ - int i; - - if (--grp->grp_ref > 0) - return; - - if (!list_empty(&grp->grp_link)) - list_del(&grp->grp_link); - - lstcon_group_drain(grp, 0); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) - LASSERT(list_empty(&grp->grp_ndl_hash[i])); - - kfree(grp); -} - -static int -lstcon_group_find(const char *name, struct lstcon_group **grpp) -{ - struct lstcon_group *grp; - - list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (strncmp(grp->grp_name, name, LST_NAME_SIZE)) - continue; - - lstcon_group_addref(grp); /* +1 ref for caller */ - *grpp = grp; - return 0; - } - - return -ENOENT; -} - -static int -lstcon_group_ndlink_find(struct lstcon_group *grp, struct lnet_process_id id, - struct lstcon_ndlink **ndlpp, int create) -{ - int rc; - - rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create); - if (rc) - return rc; - - if (!list_empty(&(*ndlpp)->ndl_link)) - return 0; - - list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list); - grp->grp_nnode++; - - return 0; -} - -static void -lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl) -{ - list_del_init(&ndl->ndl_link); - lstcon_ndlink_release(ndl); - grp->grp_nnode--; -} - -static void -lstcon_group_ndlink_move(struct lstcon_group *old, - struct lstcon_group *new, struct lstcon_ndlink *ndl) -{ - unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) % - LST_NODE_HASHSIZE; - - list_del(&ndl->ndl_hlink); - list_del(&ndl->ndl_link); - old->grp_nnode--; - - list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]); - list_add_tail(&ndl->ndl_link, &new->grp_ndl_list); - new->grp_nnode++; -} - -static void -lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new) -{ - struct lstcon_ndlink *ndl; - - while (!list_empty(&old->grp_ndl_list)) { - ndl = list_entry(old->grp_ndl_list.next, - struct lstcon_ndlink, ndl_link); - lstcon_group_ndlink_move(old, new, ndl); - } -} - -static int -lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg) -{ - struct lstcon_group *grp = (struct lstcon_group *)arg; - - switch (transop) { - case LST_TRANS_SESNEW: - if (nd->nd_state == LST_NODE_ACTIVE) - return 0; - break; - - case LST_TRANS_SESEND: - if (nd->nd_state != LST_NODE_ACTIVE) - return 0; - - if (grp && nd->nd_ref > 1) - return 0; - break; - - case LST_TRANS_SESQRY: - break; - - default: - LBUG(); - } - - return 1; -} - -static int -lstcon_sesrpc_readent(int transop, struct srpc_msg *msg, - struct lstcon_rpc_ent __user *ent_up) -{ - struct srpc_debug_reply *rep; - - switch (transop) { - case LST_TRANS_SESNEW: - case LST_TRANS_SESEND: - return 0; - - case LST_TRANS_SESQRY: - rep = &msg->msg_body.dbg_reply; - - if (copy_to_user(&ent_up->rpe_priv[0], - &rep->dbg_timeout, sizeof(int)) || - copy_to_user(&ent_up->rpe_payload[0], - &rep->dbg_name, LST_NAME_SIZE)) - return -EFAULT; - - return 0; - - default: - LBUG(); - } - - return 0; -} - -static int -lstcon_group_nodes_add(struct lstcon_group *grp, - int count, struct lnet_process_id __user *ids_up, - unsigned int *featp, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_ndlink *ndl; - struct lstcon_group *tmp; - struct lnet_process_id id; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0 ; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* skip if it's in this group already */ - rc = lstcon_group_ndlink_find(grp, id, &ndl, 0); - if (!rc) - continue; - - /* add to tmp group */ - rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1); - if (rc) { - CERROR("Can't create ndlink, out of memory\n"); - break; - } - } - - if (rc) { - lstcon_group_decref(tmp); - return rc; - } - - rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, - &tmp->grp_trans_list, LST_TRANS_SESNEW, - tmp, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - lstcon_group_decref(tmp); - return rc; - } - - /* post all RPCs */ - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_sesrpc_readent); - *featp = trans->tas_features; - - /* destroy all RPGs */ - lstcon_rpc_trans_destroy(trans); - - lstcon_group_move(tmp, grp); - lstcon_group_decref(tmp); - - return rc; -} - -static int -lstcon_group_nodes_remove(struct lstcon_group *grp, - int count, struct lnet_process_id __user *ids_up, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_ndlink *ndl; - struct lstcon_group *tmp; - struct lnet_process_id id; - int rc; - int i; - - /* End session and remove node from the group */ - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - goto error; - } - - /* move node to tmp group */ - if (!lstcon_group_ndlink_find(grp, id, &ndl, 0)) - lstcon_group_ndlink_move(grp, tmp, ndl); - } - - rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, - &tmp->grp_trans_list, LST_TRANS_SESEND, - tmp, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - goto error; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* release nodes anyway, because we can't rollback status */ - lstcon_group_decref(tmp); - - return rc; -error: - lstcon_group_move(tmp, grp); - lstcon_group_decref(tmp); - - return rc; -} - -int -lstcon_group_add(char *name) -{ - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST; - if (rc) { - /* find a group with same name */ - lstcon_group_decref(grp); - return rc; - } - - rc = lstcon_group_alloc(name, &grp); - if (rc) { - CERROR("Can't allocate descriptor for group %s\n", name); - return -ENOMEM; - } - - list_add_tail(&grp->grp_link, &console_session.ses_grp_list); - - return rc; -} - -int -lstcon_nodes_add(char *name, int count, struct lnet_process_id __user *ids_up, - unsigned int *featp, struct list_head __user *result_up) -{ - struct lstcon_group *grp; - int rc; - - LASSERT(count > 0); - LASSERT(ids_up); - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by other threads or test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - - return -EBUSY; - } - - rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up); - - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_del(char *name) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by others threads or test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &grp->grp_trans_list, LST_TRANS_SESEND, - grp, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - lstcon_group_decref(grp); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - lstcon_rpc_trans_destroy(trans); - - lstcon_group_decref(grp); - /* - * -ref for session, it's destroyed, - * status can't be rolled back, destroy group anyway - */ - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_clean(char *name, int args) -{ - struct lstcon_group *grp = NULL; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - args = (LST_NODE_ACTIVE | LST_NODE_BUSY | - LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args; - - lstcon_group_drain(grp, args); - - lstcon_group_decref(grp); - /* release empty group */ - if (list_empty(&grp->grp_ndl_list)) - lstcon_group_decref(grp); - - return 0; -} - -int -lstcon_nodes_remove(char *name, int count, - struct lnet_process_id __user *ids_up, - struct list_head __user *result_up) -{ - struct lstcon_group *grp = NULL; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up); - - lstcon_group_decref(grp); - /* release empty group */ - if (list_empty(&grp->grp_ndl_list)) - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_refresh(char *name, struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - /* re-invite all inactive nodes int the group */ - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &grp->grp_trans_list, LST_TRANS_SESNEW, - grp, lstcon_sesrpc_condition, &trans); - if (rc) { - /* local error, return */ - CDEBUG(D_NET, "Can't create transaction: %d\n", rc); - lstcon_group_decref(grp); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* -ref for me */ - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_list(int index, int len, char __user *name_up) -{ - struct lstcon_group *grp; - - LASSERT(index >= 0); - LASSERT(name_up); - - list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (!index--) { - return copy_to_user(name_up, grp->grp_name, len) ? - -EFAULT : 0; - } - } - - return -ENOENT; -} - -static int -lstcon_nodes_getent(struct list_head *head, int *index_p, - int *count_p, struct lstcon_node_ent __user *dents_up) -{ - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int count = 0; - int index = 0; - - LASSERT(index_p && count_p); - LASSERT(dents_up); - LASSERT(*index_p >= 0); - LASSERT(*count_p > 0); - - list_for_each_entry(ndl, head, ndl_link) { - if (index++ < *index_p) - continue; - - if (count >= *count_p) - break; - - nd = ndl->ndl_node; - if (copy_to_user(&dents_up[count].nde_id, - &nd->nd_id, sizeof(nd->nd_id)) || - copy_to_user(&dents_up[count].nde_state, - &nd->nd_state, sizeof(nd->nd_state))) - return -EFAULT; - - count++; - } - - if (index <= *index_p) - return -ENOENT; - - *count_p = count; - *index_p = index; - - return 0; -} - -int -lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gents_p, - int *index_p, int *count_p, - struct lstcon_node_ent __user *dents_up) -{ - struct lstcon_ndlist_ent *gentp; - struct lstcon_group *grp; - struct lstcon_ndlink *ndl; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (dents_up) { - /* verbose query */ - rc = lstcon_nodes_getent(&grp->grp_ndl_list, - index_p, count_p, dents_up); - lstcon_group_decref(grp); - - return rc; - } - - /* non-verbose query */ - gentp = kzalloc(sizeof(struct lstcon_ndlist_ent), GFP_NOFS); - if (!gentp) { - CERROR("Can't allocate ndlist_ent\n"); - lstcon_group_decref(grp); - - return -ENOMEM; - } - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp); - - rc = copy_to_user(gents_p, gentp, - sizeof(struct lstcon_ndlist_ent)) ? -EFAULT : 0; - - kfree(gentp); - - lstcon_group_decref(grp); - - return rc; -} - -static int -lstcon_batch_find(const char *name, struct lstcon_batch **batpp) -{ - struct lstcon_batch *bat; - - list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) { - *batpp = bat; - return 0; - } - } - - return -ENOENT; -} - -int -lstcon_batch_add(char *name) -{ - struct lstcon_batch *bat; - int i; - int rc; - - rc = !lstcon_batch_find(name, &bat) ? -EEXIST : 0; - if (rc) { - CDEBUG(D_NET, "Batch %s already exists\n", name); - return rc; - } - - bat = kzalloc(sizeof(struct lstcon_batch), GFP_NOFS); - if (!bat) { - CERROR("Can't allocate descriptor for batch %s\n", name); - return -ENOMEM; - } - - bat->bat_cli_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE, - GFP_KERNEL); - if (!bat->bat_cli_hash) { - CERROR("Can't allocate hash for batch %s\n", name); - kfree(bat); - - return -ENOMEM; - } - - bat->bat_srv_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE, - GFP_KERNEL); - if (!bat->bat_srv_hash) { - CERROR("Can't allocate hash for batch %s\n", name); - kfree(bat->bat_cli_hash); - kfree(bat); - - return -ENOMEM; - } - - if (strlen(name) > sizeof(bat->bat_name) - 1) { - kfree(bat->bat_srv_hash); - kfree(bat->bat_cli_hash); - kfree(bat); - return -E2BIG; - } - strncpy(bat->bat_name, name, sizeof(bat->bat_name)); - bat->bat_hdr.tsb_index = 0; - bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie; - - bat->bat_ntest = 0; - bat->bat_state = LST_BATCH_IDLE; - - INIT_LIST_HEAD(&bat->bat_cli_list); - INIT_LIST_HEAD(&bat->bat_srv_list); - INIT_LIST_HEAD(&bat->bat_test_list); - INIT_LIST_HEAD(&bat->bat_trans_list); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - INIT_LIST_HEAD(&bat->bat_cli_hash[i]); - INIT_LIST_HEAD(&bat->bat_srv_hash[i]); - } - - list_add_tail(&bat->bat_link, &console_session.ses_bat_list); - - return rc; -} - -int -lstcon_batch_list(int index, int len, char __user *name_up) -{ - struct lstcon_batch *bat; - - LASSERT(name_up); - LASSERT(index >= 0); - - list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (!index--) { - return copy_to_user(name_up, bat->bat_name, len) ? - -EFAULT : 0; - } - } - - return -ENOENT; -} - -int -lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up, - int server, int testidx, int *index_p, int *ndent_p, - struct lstcon_node_ent __user *dents_up) -{ - struct lstcon_test_batch_ent *entp; - struct list_head *clilst; - struct list_head *srvlst; - struct lstcon_test *test = NULL; - struct lstcon_batch *bat; - struct lstcon_ndlink *ndl; - int rc; - - rc = lstcon_batch_find(name, &bat); - if (rc) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - if (testidx > 0) { - /* query test, test index start from 1 */ - list_for_each_entry(test, &bat->bat_test_list, tes_link) { - if (testidx-- == 1) - break; - } - - if (testidx > 0) { - CDEBUG(D_NET, "Can't find specified test in batch\n"); - return -ENOENT; - } - } - - clilst = !test ? &bat->bat_cli_list : - &test->tes_src_grp->grp_ndl_list; - srvlst = !test ? &bat->bat_srv_list : - &test->tes_dst_grp->grp_ndl_list; - - if (dents_up) { - rc = lstcon_nodes_getent((server ? srvlst : clilst), - index_p, ndent_p, dents_up); - return rc; - } - - /* non-verbose query */ - entp = kzalloc(sizeof(struct lstcon_test_batch_ent), GFP_NOFS); - if (!entp) - return -ENOMEM; - - if (!test) { - entp->u.tbe_batch.bae_ntest = bat->bat_ntest; - entp->u.tbe_batch.bae_state = bat->bat_state; - } else { - entp->u.tbe_test.tse_type = test->tes_type; - entp->u.tbe_test.tse_loop = test->tes_loop; - entp->u.tbe_test.tse_concur = test->tes_concur; - } - - list_for_each_entry(ndl, clilst, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle); - - list_for_each_entry(ndl, srvlst, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle); - - rc = copy_to_user(ent_up, entp, - sizeof(struct lstcon_test_batch_ent)) ? -EFAULT : 0; - - kfree(entp); - - return rc; -} - -static int -lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg) -{ - switch (transop) { - case LST_TRANS_TSBRUN: - if (nd->nd_state != LST_NODE_ACTIVE) - return -ENETDOWN; - break; - - case LST_TRANS_TSBSTOP: - if (nd->nd_state != LST_NODE_ACTIVE) - return 0; - break; - - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - break; - } - - return 1; -} - -static int -lstcon_batch_op(struct lstcon_batch *bat, int transop, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - int rc; - - rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list, - &bat->bat_trans_list, transop, - bat, lstcon_batrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up) -{ - struct lstcon_batch *bat; - int rc; - - if (lstcon_batch_find(name, &bat)) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - bat->bat_arg = timeout; - - rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up); - - /* mark batch as running if it's started in any node */ - if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0)) - bat->bat_state = LST_BATCH_RUNNING; - - return rc; -} - -int -lstcon_batch_stop(char *name, int force, struct list_head __user *result_up) -{ - struct lstcon_batch *bat; - int rc; - - if (lstcon_batch_find(name, &bat)) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - bat->bat_arg = force; - - rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up); - - /* mark batch as stopped if all RPCs finished */ - if (!lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0)) - bat->bat_state = LST_BATCH_IDLE; - - return rc; -} - -static void -lstcon_batch_destroy(struct lstcon_batch *bat) -{ - struct lstcon_ndlink *ndl; - struct lstcon_test *test; - int i; - - list_del(&bat->bat_link); - - while (!list_empty(&bat->bat_test_list)) { - test = list_entry(bat->bat_test_list.next, - struct lstcon_test, tes_link); - LASSERT(list_empty(&test->tes_trans_list)); - - list_del(&test->tes_link); - - lstcon_group_decref(test->tes_src_grp); - lstcon_group_decref(test->tes_dst_grp); - - kfree(test); - } - - LASSERT(list_empty(&bat->bat_trans_list)); - - while (!list_empty(&bat->bat_cli_list)) { - ndl = list_entry(bat->bat_cli_list.next, - struct lstcon_ndlink, ndl_link); - list_del_init(&ndl->ndl_link); - - lstcon_ndlink_release(ndl); - } - - while (!list_empty(&bat->bat_srv_list)) { - ndl = list_entry(bat->bat_srv_list.next, - struct lstcon_ndlink, ndl_link); - list_del_init(&ndl->ndl_link); - - lstcon_ndlink_release(ndl); - } - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - LASSERT(list_empty(&bat->bat_cli_hash[i])); - LASSERT(list_empty(&bat->bat_srv_hash[i])); - } - - kfree(bat->bat_cli_hash); - kfree(bat->bat_srv_hash); - kfree(bat); -} - -static int -lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg) -{ - struct lstcon_test *test; - struct lstcon_batch *batch; - struct lstcon_ndlink *ndl; - struct list_head *hash; - struct list_head *head; - - test = (struct lstcon_test *)arg; - LASSERT(test); - - batch = test->tes_batch; - LASSERT(batch); - - if (test->tes_oneside && - transop == LST_TRANS_TSBSRVADD) - return 0; - - if (nd->nd_state != LST_NODE_ACTIVE) - return -ENETDOWN; - - if (transop == LST_TRANS_TSBCLIADD) { - hash = batch->bat_cli_hash; - head = &batch->bat_cli_list; - - } else { - LASSERT(transop == LST_TRANS_TSBSRVADD); - - hash = batch->bat_srv_hash; - head = &batch->bat_srv_list; - } - - LASSERT(nd->nd_id.nid != LNET_NID_ANY); - - if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1)) - return -ENOMEM; - - if (list_empty(&ndl->ndl_link)) - list_add_tail(&ndl->ndl_link, head); - - return 1; -} - -static int -lstcon_test_nodes_add(struct lstcon_test *test, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - int transop; - int rc; - - LASSERT(test->tes_src_grp); - LASSERT(test->tes_dst_grp); - - transop = LST_TRANS_TSBSRVADD; - grp = test->tes_dst_grp; -again: - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &test->tes_trans_list, transop, - test, lstcon_testrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - if (lstcon_trans_stat()->trs_rpc_errno || - lstcon_trans_stat()->trs_fwk_errno) { - lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* return if any error */ - CDEBUG(D_NET, "Failed to add test %s, RPC error %d, framework error %d\n", - transop == LST_TRANS_TSBCLIADD ? "client" : "server", - lstcon_trans_stat()->trs_rpc_errno, - lstcon_trans_stat()->trs_fwk_errno); - - return rc; - } - - lstcon_rpc_trans_destroy(trans); - - if (transop == LST_TRANS_TSBCLIADD) - return rc; - - transop = LST_TRANS_TSBCLIADD; - grp = test->tes_src_grp; - test->tes_cliidx = 0; - - /* requests to test clients */ - goto again; -} - -static int -lstcon_verify_batch(const char *name, struct lstcon_batch **batch) -{ - int rc; - - rc = lstcon_batch_find(name, batch); - if (rc) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return rc; - } - - if ((*batch)->bat_state != LST_BATCH_IDLE) { - CDEBUG(D_NET, "Can't change running batch %s\n", name); - return -EINVAL; - } - - return 0; -} - -static int -lstcon_verify_group(const char *name, struct lstcon_group **grp) -{ - int rc; - struct lstcon_ndlink *ndl; - - rc = lstcon_group_find(name, grp); - if (rc) { - CDEBUG(D_NET, "can't find group %s\n", name); - return rc; - } - - list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) { - if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE) - return 0; - } - - CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name); - - return -EINVAL; -} - -int -lstcon_test_add(char *batch_name, int type, int loop, - int concur, int dist, int span, - char *src_name, char *dst_name, - void *param, int paramlen, int *retp, - struct list_head __user *result_up) -{ - struct lstcon_test *test = NULL; - int rc; - struct lstcon_group *src_grp = NULL; - struct lstcon_group *dst_grp = NULL; - struct lstcon_batch *batch = NULL; - - /* - * verify that a batch of the given name exists, and the groups - * that will be part of the batch exist and have at least one - * active node - */ - rc = lstcon_verify_batch(batch_name, &batch); - if (rc) - goto out; - - rc = lstcon_verify_group(src_name, &src_grp); - if (rc) - goto out; - - rc = lstcon_verify_group(dst_name, &dst_grp); - if (rc) - goto out; - - if (dst_grp->grp_userland) - *retp = 1; - - test = kzalloc(offsetof(struct lstcon_test, tes_param[paramlen]), - GFP_KERNEL); - if (!test) { - CERROR("Can't allocate test descriptor\n"); - rc = -ENOMEM; - - goto out; - } - - test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id; - test->tes_batch = batch; - test->tes_type = type; - test->tes_oneside = 0; /* TODO */ - test->tes_loop = loop; - test->tes_concur = concur; - test->tes_stop_onerr = 1; /* TODO */ - test->tes_span = span; - test->tes_dist = dist; - test->tes_cliidx = 0; /* just used for creating RPC */ - test->tes_src_grp = src_grp; - test->tes_dst_grp = dst_grp; - INIT_LIST_HEAD(&test->tes_trans_list); - - if (param) { - test->tes_paramlen = paramlen; - memcpy(&test->tes_param[0], param, paramlen); - } - - rc = lstcon_test_nodes_add(test, result_up); - - if (rc) - goto out; - - if (lstcon_trans_stat()->trs_rpc_errno || - lstcon_trans_stat()->trs_fwk_errno) - CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, - batch_name); - - /* add to test list anyway, so user can check what's going on */ - list_add_tail(&test->tes_link, &batch->bat_test_list); - - batch->bat_ntest++; - test->tes_hdr.tsb_index = batch->bat_ntest; - - /* hold groups so nobody can change them */ - return rc; -out: - kfree(test); - - if (dst_grp) - lstcon_group_decref(dst_grp); - - if (src_grp) - lstcon_group_decref(src_grp); - - return rc; -} - -static int -lstcon_test_find(struct lstcon_batch *batch, int idx, - struct lstcon_test **testpp) -{ - struct lstcon_test *test; - - list_for_each_entry(test, &batch->bat_test_list, tes_link) { - if (idx == test->tes_hdr.tsb_index) { - *testpp = test; - return 0; - } - } - - return -ENOENT; -} - -static int -lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg, - struct lstcon_rpc_ent __user *ent_up) -{ - struct srpc_batch_reply *rep = &msg->msg_body.bat_reply; - - LASSERT(transop == LST_TRANS_TSBCLIQRY || - transop == LST_TRANS_TSBSRVQRY); - - /* positive errno, framework error code */ - if (copy_to_user(&ent_up->rpe_priv[0], &rep->bar_active, - sizeof(rep->bar_active))) - return -EFAULT; - - return 0; -} - -int -lstcon_test_batch_query(char *name, int testidx, int client, - int timeout, struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct list_head *translist; - struct list_head *ndlist; - struct lstcon_tsb_hdr *hdr; - struct lstcon_batch *batch; - struct lstcon_test *test = NULL; - int transop; - int rc; - - rc = lstcon_batch_find(name, &batch); - if (rc) { - CDEBUG(D_NET, "Can't find batch: %s\n", name); - return rc; - } - - if (!testidx) { - translist = &batch->bat_trans_list; - ndlist = &batch->bat_cli_list; - hdr = &batch->bat_hdr; - } else { - /* query specified test only */ - rc = lstcon_test_find(batch, testidx, &test); - if (rc) { - CDEBUG(D_NET, "Can't find test: %d\n", testidx); - return rc; - } - - translist = &test->tes_trans_list; - ndlist = &test->tes_src_grp->grp_ndl_list; - hdr = &test->tes_hdr; - } - - transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY; - - rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr, - lstcon_batrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, timeout); - - /* query a batch, not a test */ - if (!testidx && - !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) && - !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) { - /* all RPCs finished, and no active test */ - batch->bat_state = LST_BATCH_IDLE; - } - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_tsbrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -static int -lstcon_statrpc_readent(int transop, struct srpc_msg *msg, - struct lstcon_rpc_ent __user *ent_up) -{ - struct srpc_stat_reply *rep = &msg->msg_body.stat_reply; - struct sfw_counters __user *sfwk_stat; - struct srpc_counters __user *srpc_stat; - struct lnet_counters __user *lnet_stat; - - if (rep->str_status) - return 0; - - sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0]; - srpc_stat = (struct srpc_counters __user *)(sfwk_stat + 1); - lnet_stat = (struct lnet_counters __user *)(srpc_stat + 1); - - if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) || - copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) || - copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat))) - return -EFAULT; - - return 0; -} - -static int -lstcon_ndlist_stat(struct list_head *ndlist, - int timeout, struct list_head __user *result_up) -{ - struct list_head head; - struct lstcon_rpc_trans *trans; - int rc; - - INIT_LIST_HEAD(&head); - - rc = lstcon_rpc_trans_ndlist(ndlist, &head, - LST_TRANS_STATQRY, NULL, NULL, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout)); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_statrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_group_stat(char *grp_name, int timeout, - struct list_head __user *result_up) -{ - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(grp_name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", grp_name); - return rc; - } - - rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up); - - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up, - int timeout, struct list_head __user *result_up) -{ - struct lstcon_ndlink *ndl; - struct lstcon_group *tmp; - struct lnet_process_id id; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0 ; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* add to tmp group */ - rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2); - if (rc) { - CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET, - "Failed to find or create %s: %d\n", - libcfs_id2str(id), rc); - break; - } - } - - if (rc) { - lstcon_group_decref(tmp); - return rc; - } - - rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up); - - lstcon_group_decref(tmp); - - return rc; -} - -static int -lstcon_debug_ndlist(struct list_head *ndlist, - struct list_head *translist, - int timeout, struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - int rc; - - rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY, - NULL, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout)); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_sesrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_session_debug(int timeout, struct list_head __user *result_up) -{ - return lstcon_debug_ndlist(&console_session.ses_ndl_list, - NULL, timeout, result_up); -} - -int -lstcon_batch_debug(int timeout, char *name, - int client, struct list_head __user *result_up) -{ - struct lstcon_batch *bat; - int rc; - - rc = lstcon_batch_find(name, &bat); - if (rc) - return -ENOENT; - - rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list : - &bat->bat_srv_list, - NULL, timeout, result_up); - - return rc; -} - -int -lstcon_group_debug(int timeout, char *name, - struct list_head __user *result_up) -{ - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) - return -ENOENT; - - rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL, - timeout, result_up); - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_nodes_debug(int timeout, int count, - struct lnet_process_id __user *ids_up, - struct list_head __user *result_up) -{ - struct lnet_process_id id; - struct lstcon_ndlink *ndl; - struct lstcon_group *grp; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &grp); - if (rc) { - CDEBUG(D_NET, "Out of memory\n"); - return rc; - } - - for (i = 0; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* node is added to tmp group */ - rc = lstcon_group_ndlink_find(grp, id, &ndl, 1); - if (rc) { - CERROR("Can't create node link\n"); - break; - } - } - - if (rc) { - lstcon_group_decref(grp); - return rc; - } - - rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL, - timeout, result_up); - - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_session_match(struct lst_sid sid) -{ - return (console_session.ses_id.ses_nid == sid.ses_nid && - console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0; -} - -static void -lstcon_new_session_id(struct lst_sid *sid) -{ - struct lnet_process_id id; - - LASSERT(console_session.ses_state == LST_SESSION_NONE); - - LNetGetId(1, &id); - sid->ses_nid = id.nid; - sid->ses_stamp = cfs_time_current(); -} - -int -lstcon_session_new(char *name, int key, unsigned int feats, - int timeout, int force, struct lst_sid __user *sid_up) -{ - int rc = 0; - int i; - - if (console_session.ses_state != LST_SESSION_NONE) { - /* session exists */ - if (!force) { - CNETERR("Session %s already exists\n", - console_session.ses_name); - return -EEXIST; - } - - rc = lstcon_session_end(); - - /* lstcon_session_end() only return local error */ - if (rc) - return rc; - } - - if (feats & ~LST_FEATS_MASK) { - CNETERR("Unknown session features %x\n", - (feats & ~LST_FEATS_MASK)); - return -EINVAL; - } - - for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) - LASSERT(list_empty(&console_session.ses_ndl_hash[i])); - - lstcon_new_session_id(&console_session.ses_id); - - console_session.ses_key = key; - console_session.ses_state = LST_SESSION_ACTIVE; - console_session.ses_force = !!force; - console_session.ses_features = feats; - console_session.ses_feats_updated = 0; - console_session.ses_timeout = (timeout <= 0) ? - LST_CONSOLE_TIMEOUT : timeout; - - if (strlen(name) > sizeof(console_session.ses_name) - 1) - return -E2BIG; - strlcpy(console_session.ses_name, name, - sizeof(console_session.ses_name)); - - rc = lstcon_batch_add(LST_DEFAULT_BATCH); - if (rc) - return rc; - - rc = lstcon_rpc_pinger_start(); - if (rc) { - struct lstcon_batch *bat = NULL; - - lstcon_batch_find(LST_DEFAULT_BATCH, &bat); - lstcon_batch_destroy(bat); - - return rc; - } - - if (!copy_to_user(sid_up, &console_session.ses_id, - sizeof(struct lst_sid))) - return rc; - - lstcon_session_end(); - - return -EFAULT; -} - -int -lstcon_session_info(struct lst_sid __user *sid_up, int __user *key_up, - unsigned __user *featp, - struct lstcon_ndlist_ent __user *ndinfo_up, - char __user *name_up, int len) -{ - struct lstcon_ndlist_ent *entp; - struct lstcon_ndlink *ndl; - int rc = 0; - - if (console_session.ses_state != LST_SESSION_ACTIVE) - return -ESRCH; - - entp = kzalloc(sizeof(*entp), GFP_NOFS); - if (!entp) - return -ENOMEM; - - list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, entp); - - if (copy_to_user(sid_up, &console_session.ses_id, - sizeof(*sid_up)) || - copy_to_user(key_up, &console_session.ses_key, - sizeof(*key_up)) || - copy_to_user(featp, &console_session.ses_features, - sizeof(*featp)) || - copy_to_user(ndinfo_up, entp, sizeof(*entp)) || - copy_to_user(name_up, console_session.ses_name, len)) - rc = -EFAULT; - - kfree(entp); - - return rc; -} - -int -lstcon_session_end(void) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - struct lstcon_batch *bat; - int rc = 0; - - LASSERT(console_session.ses_state == LST_SESSION_ACTIVE); - - rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list, - NULL, LST_TRANS_SESEND, NULL, - lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - console_session.ses_shutdown = 1; - - lstcon_rpc_pinger_stop(); - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - lstcon_rpc_trans_destroy(trans); - /* User can do nothing even rpc failed, so go on */ - - /* waiting for orphan rpcs to die */ - lstcon_rpc_cleanup_wait(); - - console_session.ses_id = LST_INVALID_SID; - console_session.ses_state = LST_SESSION_NONE; - console_session.ses_key = 0; - console_session.ses_force = 0; - console_session.ses_feats_updated = 0; - - /* destroy all batches */ - while (!list_empty(&console_session.ses_bat_list)) { - bat = list_entry(console_session.ses_bat_list.next, - struct lstcon_batch, bat_link); - - lstcon_batch_destroy(bat); - } - - /* destroy all groups */ - while (!list_empty(&console_session.ses_grp_list)) { - grp = list_entry(console_session.ses_grp_list.next, - struct lstcon_group, grp_link); - LASSERT(grp->grp_ref == 1); - - lstcon_group_decref(grp); - } - - /* all nodes should be released */ - LASSERT(list_empty(&console_session.ses_ndl_list)); - - console_session.ses_shutdown = 0; - console_session.ses_expired = 0; - - return rc; -} - -int -lstcon_session_feats_check(unsigned int feats) -{ - int rc = 0; - - if (feats & ~LST_FEATS_MASK) { - CERROR("Can't support these features: %x\n", - (feats & ~LST_FEATS_MASK)); - return -EPROTO; - } - - spin_lock(&console_session.ses_rpc_lock); - - if (!console_session.ses_feats_updated) { - console_session.ses_feats_updated = 1; - console_session.ses_features = feats; - } - - if (console_session.ses_features != feats) - rc = -EPROTO; - - spin_unlock(&console_session.ses_rpc_lock); - - if (rc) { - CERROR("remote features %x do not match with session features %x of console\n", - feats, console_session.ses_features); - } - - return rc; -} - -static int -lstcon_acceptor_handle(struct srpc_server_rpc *rpc) -{ - struct srpc_msg *rep = &rpc->srpc_replymsg; - struct srpc_msg *req = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_join_reqst *jreq = &req->msg_body.join_reqst; - struct srpc_join_reply *jrep = &rep->msg_body.join_reply; - struct lstcon_group *grp = NULL; - struct lstcon_ndlink *ndl; - int rc = 0; - - sfw_unpack_message(req); - - mutex_lock(&console_session.ses_mutex); - - jrep->join_sid = console_session.ses_id; - - if (console_session.ses_id.ses_nid == LNET_NID_ANY) { - jrep->join_status = ESRCH; - goto out; - } - - if (lstcon_session_feats_check(req->msg_ses_feats)) { - jrep->join_status = EPROTO; - goto out; - } - - if (jreq->join_sid.ses_nid != LNET_NID_ANY && - !lstcon_session_match(jreq->join_sid)) { - jrep->join_status = EBUSY; - goto out; - } - - if (lstcon_group_find(jreq->join_group, &grp)) { - rc = lstcon_group_alloc(jreq->join_group, &grp); - if (rc) { - CERROR("Out of memory\n"); - goto out; - } - - list_add_tail(&grp->grp_link, - &console_session.ses_grp_list); - lstcon_group_addref(grp); - } - - if (grp->grp_ref > 2) { - /* Group in using */ - jrep->join_status = EBUSY; - goto out; - } - - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0); - if (!rc) { - jrep->join_status = EEXIST; - goto out; - } - - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1); - if (rc) { - CERROR("Out of memory\n"); - goto out; - } - - ndl->ndl_node->nd_state = LST_NODE_ACTIVE; - ndl->ndl_node->nd_timeout = console_session.ses_timeout; - - if (!grp->grp_userland) - grp->grp_userland = 1; - - strlcpy(jrep->join_session, console_session.ses_name, - sizeof(jrep->join_session)); - jrep->join_timeout = console_session.ses_timeout; - jrep->join_status = 0; - -out: - rep->msg_ses_feats = console_session.ses_features; - if (grp) - lstcon_group_decref(grp); - - mutex_unlock(&console_session.ses_mutex); - - return rc; -} - -static struct srpc_service lstcon_acceptor_service; - -static void lstcon_init_acceptor_service(void) -{ - /* initialize selftest console acceptor service table */ - lstcon_acceptor_service.sv_name = "join session"; - lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle; - lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN; - lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX; -} - -static DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry); - -/* initialize console */ -int -lstcon_console_init(void) -{ - int i; - int rc; - - memset(&console_session, 0, sizeof(struct lstcon_session)); - - console_session.ses_id = LST_INVALID_SID; - console_session.ses_state = LST_SESSION_NONE; - console_session.ses_timeout = 0; - console_session.ses_force = 0; - console_session.ses_expired = 0; - console_session.ses_feats_updated = 0; - console_session.ses_features = LST_FEATS_MASK; - console_session.ses_laststamp = ktime_get_real_seconds(); - - mutex_init(&console_session.ses_mutex); - - INIT_LIST_HEAD(&console_session.ses_ndl_list); - INIT_LIST_HEAD(&console_session.ses_grp_list); - INIT_LIST_HEAD(&console_session.ses_bat_list); - INIT_LIST_HEAD(&console_session.ses_trans_list); - - console_session.ses_ndl_hash = - kmalloc(sizeof(struct list_head) * LST_GLOBAL_HASHSIZE, GFP_KERNEL); - if (!console_session.ses_ndl_hash) - return -ENOMEM; - - for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) - INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]); - - /* initialize acceptor service table */ - lstcon_init_acceptor_service(); - - rc = srpc_add_service(&lstcon_acceptor_service); - LASSERT(rc != -EBUSY); - if (rc) { - kfree(console_session.ses_ndl_hash); - return rc; - } - - rc = srpc_service_add_buffers(&lstcon_acceptor_service, - lstcon_acceptor_service.sv_wi_total); - if (rc) { - rc = -ENOMEM; - goto out; - } - - rc = libcfs_register_ioctl(&lstcon_ioctl_handler); - - if (!rc) { - lstcon_rpc_module_init(); - return 0; - } - -out: - srpc_shutdown_service(&lstcon_acceptor_service); - srpc_remove_service(&lstcon_acceptor_service); - - kfree(console_session.ses_ndl_hash); - - srpc_wait_service_shutdown(&lstcon_acceptor_service); - - return rc; -} - -int -lstcon_console_fini(void) -{ - int i; - - libcfs_deregister_ioctl(&lstcon_ioctl_handler); - - mutex_lock(&console_session.ses_mutex); - - srpc_shutdown_service(&lstcon_acceptor_service); - srpc_remove_service(&lstcon_acceptor_service); - - if (console_session.ses_state != LST_SESSION_NONE) - lstcon_session_end(); - - lstcon_rpc_module_fini(); - - mutex_unlock(&console_session.ses_mutex); - - LASSERT(list_empty(&console_session.ses_ndl_list)); - LASSERT(list_empty(&console_session.ses_grp_list)); - LASSERT(list_empty(&console_session.ses_bat_list)); - LASSERT(list_empty(&console_session.ses_trans_list)); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) - LASSERT(list_empty(&console_session.ses_ndl_hash[i])); - - kfree(console_session.ses_ndl_hash); - - srpc_wait_service_shutdown(&lstcon_acceptor_service); - - return 0; -} diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h deleted file mode 100644 index 3933ed4cca93..000000000000 --- a/drivers/staging/lustre/lnet/selftest/console.h +++ /dev/null @@ -1,244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/console.h - * - * kernel structure for LST console - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - */ - -#ifndef __LST_CONSOLE_H__ -#define __LST_CONSOLE_H__ - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-types.h> -#include <uapi/linux/lnet/lnetst.h> -#include "selftest.h" -#include "conrpc.h" - -/* node descriptor */ -struct lstcon_node { - struct lnet_process_id nd_id; /* id of the node */ - int nd_ref; /* reference count */ - int nd_state; /* state of the node */ - int nd_timeout; /* session timeout */ - unsigned long nd_stamp; /* timestamp of last replied RPC */ - struct lstcon_rpc nd_ping; /* ping rpc */ -}; - -/* node link descriptor */ -struct lstcon_ndlink { - struct list_head ndl_link; /* chain on list */ - struct list_head ndl_hlink; /* chain on hash */ - struct lstcon_node *ndl_node; /* pointer to node */ -}; - -/* (alias of nodes) group descriptor */ -struct lstcon_group { - struct list_head grp_link; /* chain on global group list - */ - int grp_ref; /* reference count */ - int grp_userland; /* has userland nodes */ - int grp_nnode; /* # of nodes */ - char grp_name[LST_NAME_SIZE]; /* group name */ - - struct list_head grp_trans_list; /* transaction list */ - struct list_head grp_ndl_list; /* nodes list */ - struct list_head grp_ndl_hash[0]; /* hash table for nodes */ -}; - -#define LST_BATCH_IDLE 0xB0 /* idle batch */ -#define LST_BATCH_RUNNING 0xB1 /* running batch */ - -struct lstcon_tsb_hdr { - struct lst_bid tsb_id; /* batch ID */ - int tsb_index; /* test index */ -}; - -/* (tests ) batch descriptor */ -struct lstcon_batch { - struct lstcon_tsb_hdr bat_hdr; /* test_batch header */ - struct list_head bat_link; /* chain on session's batches list */ - int bat_ntest; /* # of test */ - int bat_state; /* state of the batch */ - int bat_arg; /* parameter for run|stop, timeout - * for run, force for stop - */ - char bat_name[LST_NAME_SIZE];/* name of batch */ - - struct list_head bat_test_list; /* list head of tests (struct lstcon_test) - */ - struct list_head bat_trans_list; /* list head of transaction */ - struct list_head bat_cli_list; /* list head of client nodes - * (struct lstcon_node) - */ - struct list_head *bat_cli_hash; /* hash table of client nodes */ - struct list_head bat_srv_list; /* list head of server nodes */ - struct list_head *bat_srv_hash; /* hash table of server nodes */ -}; - -/* a single test descriptor */ -struct lstcon_test { - struct lstcon_tsb_hdr tes_hdr; /* test batch header */ - struct list_head tes_link; /* chain on batch's tests list */ - struct lstcon_batch *tes_batch; /* pointer to batch */ - - int tes_type; /* type of the test, i.e: bulk, ping */ - int tes_stop_onerr; /* stop on error */ - int tes_oneside; /* one-sided test */ - int tes_concur; /* concurrency */ - int tes_loop; /* loop count */ - int tes_dist; /* nodes distribution of target group */ - int tes_span; /* nodes span of target group */ - int tes_cliidx; /* client index, used for RPC creating */ - - struct list_head tes_trans_list; /* transaction list */ - struct lstcon_group *tes_src_grp; /* group run the test */ - struct lstcon_group *tes_dst_grp; /* target group */ - - int tes_paramlen; /* test parameter length */ - char tes_param[0]; /* test parameter */ -}; - -#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */ -#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */ - -#define LST_SESSION_NONE 0x0 /* no session */ -#define LST_SESSION_ACTIVE 0x1 /* working session */ - -#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */ - -struct lstcon_session { - struct mutex ses_mutex; /* only 1 thread in session */ - struct lst_sid ses_id; /* global session id */ - int ses_key; /* local session key */ - int ses_state; /* state of session */ - int ses_timeout; /* timeout in seconds */ - time64_t ses_laststamp; /* last operation stamp (seconds) - */ - unsigned int ses_features; /* tests features of the session - */ - unsigned int ses_feats_updated:1; /* features are synced with - * remote test nodes - */ - unsigned int ses_force:1; /* force creating */ - unsigned int ses_shutdown:1; /* session is shutting down */ - unsigned int ses_expired:1; /* console is timedout */ - __u64 ses_id_cookie; /* batch id cookie */ - char ses_name[LST_NAME_SIZE];/* session name */ - struct lstcon_rpc_trans *ses_ping; /* session pinger */ - struct stt_timer ses_ping_timer; /* timer for pinger */ - struct lstcon_trans_stat ses_trans_stat; /* transaction stats */ - - struct list_head ses_trans_list; /* global list of transaction */ - struct list_head ses_grp_list; /* global list of groups */ - struct list_head ses_bat_list; /* global list of batches */ - struct list_head ses_ndl_list; /* global list of nodes */ - struct list_head *ses_ndl_hash; /* hash table of nodes */ - - spinlock_t ses_rpc_lock; /* serialize */ - atomic_t ses_rpc_counter; /* # of initialized RPCs */ - struct list_head ses_rpc_freelist; /* idle console rpc */ -}; /* session descriptor */ - -extern struct lstcon_session console_session; - -static inline struct lstcon_trans_stat * -lstcon_trans_stat(void) -{ - return &console_session.ses_trans_stat; -} - -static inline struct list_head * -lstcon_id2hash(struct lnet_process_id id, struct list_head *hash) -{ - unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; - - return &hash[idx]; -} - -int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr); -int lstcon_console_init(void); -int lstcon_console_fini(void); -int lstcon_session_match(struct lst_sid sid); -int lstcon_session_new(char *name, int key, unsigned int version, - int timeout, int flags, struct lst_sid __user *sid_up); -int lstcon_session_info(struct lst_sid __user *sid_up, int __user *key, - unsigned __user *verp, struct lstcon_ndlist_ent __user *entp, - char __user *name_up, int len); -int lstcon_session_end(void); -int lstcon_session_debug(int timeout, struct list_head __user *result_up); -int lstcon_session_feats_check(unsigned int feats); -int lstcon_batch_debug(int timeout, char *name, - int client, struct list_head __user *result_up); -int lstcon_group_debug(int timeout, char *name, - struct list_head __user *result_up); -int lstcon_nodes_debug(int timeout, int nnd, - struct lnet_process_id __user *nds_up, - struct list_head __user *result_up); -int lstcon_group_add(char *name); -int lstcon_group_del(char *name); -int lstcon_group_clean(char *name, int args); -int lstcon_group_refresh(char *name, struct list_head __user *result_up); -int lstcon_nodes_add(char *name, int nnd, struct lnet_process_id __user *nds_up, - unsigned int *featp, struct list_head __user *result_up); -int lstcon_nodes_remove(char *name, int nnd, - struct lnet_process_id __user *nds_up, - struct list_head __user *result_up); -int lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gent_up, - int *index_p, int *ndent_p, - struct lstcon_node_ent __user *ndents_up); -int lstcon_group_list(int idx, int len, char __user *name_up); -int lstcon_batch_add(char *name); -int lstcon_batch_run(char *name, int timeout, - struct list_head __user *result_up); -int lstcon_batch_stop(char *name, int force, - struct list_head __user *result_up); -int lstcon_test_batch_query(char *name, int testidx, - int client, int timeout, - struct list_head __user *result_up); -int lstcon_batch_del(char *name); -int lstcon_batch_list(int idx, int namelen, char __user *name_up); -int lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up, - int server, int testidx, int *index_p, - int *ndent_p, struct lstcon_node_ent __user *dents_up); -int lstcon_group_stat(char *grp_name, int timeout, - struct list_head __user *result_up); -int lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up, - int timeout, struct list_head __user *result_up); -int lstcon_test_add(char *batch_name, int type, int loop, - int concur, int dist, int span, - char *src_name, char *dst_name, - void *param, int paramlen, int *retp, - struct list_head __user *result_up); -#endif diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c deleted file mode 100644 index 0ca1e3a780ca..000000000000 --- a/drivers/staging/lustre/lnet/selftest/framework.c +++ /dev/null @@ -1,1786 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/framework.c - * - * Author: Isaac Huang <isaac@clusterfs.com> - * Author: Liang Zhen <liangzhen@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - -struct lst_sid LST_INVALID_SID = {LNET_NID_ANY, -1}; - -static int session_timeout = 100; -module_param(session_timeout, int, 0444); -MODULE_PARM_DESC(session_timeout, "test session timeout in seconds (100 by default, 0 == never)"); - -static int rpc_timeout = 64; -module_param(rpc_timeout, int, 0644); -MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never)"); - -#define sfw_unpack_id(id) \ -do { \ - __swab64s(&(id).nid); \ - __swab32s(&(id).pid); \ -} while (0) - -#define sfw_unpack_sid(sid) \ -do { \ - __swab64s(&(sid).ses_nid); \ - __swab64s(&(sid).ses_stamp); \ -} while (0) - -#define sfw_unpack_fw_counters(fc) \ -do { \ - __swab32s(&(fc).running_ms); \ - __swab32s(&(fc).active_batches); \ - __swab32s(&(fc).zombie_sessions); \ - __swab32s(&(fc).brw_errors); \ - __swab32s(&(fc).ping_errors); \ -} while (0) - -#define sfw_unpack_rpc_counters(rc) \ -do { \ - __swab32s(&(rc).errors); \ - __swab32s(&(rc).rpcs_sent); \ - __swab32s(&(rc).rpcs_rcvd); \ - __swab32s(&(rc).rpcs_dropped); \ - __swab32s(&(rc).rpcs_expired); \ - __swab64s(&(rc).bulk_get); \ - __swab64s(&(rc).bulk_put); \ -} while (0) - -#define sfw_unpack_lnet_counters(lc) \ -do { \ - __swab32s(&(lc).errors); \ - __swab32s(&(lc).msgs_max); \ - __swab32s(&(lc).msgs_alloc); \ - __swab32s(&(lc).send_count); \ - __swab32s(&(lc).recv_count); \ - __swab32s(&(lc).drop_count); \ - __swab32s(&(lc).route_count); \ - __swab64s(&(lc).send_length); \ - __swab64s(&(lc).recv_length); \ - __swab64s(&(lc).drop_length); \ - __swab64s(&(lc).route_length); \ -} while (0) - -#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive)) -#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive)) - -static struct smoketest_framework { - struct list_head fw_zombie_rpcs; /* RPCs to be recycled */ - struct list_head fw_zombie_sessions; /* stopping sessions */ - struct list_head fw_tests; /* registered test cases */ - atomic_t fw_nzombies; /* # zombie sessions */ - spinlock_t fw_lock; /* serialise */ - struct sfw_session *fw_session; /* _the_ session */ - int fw_shuttingdown; /* shutdown in progress */ - struct srpc_server_rpc *fw_active_srpc;/* running RPC */ -} sfw_data; - -/* forward ref's */ -int sfw_stop_batch(struct sfw_batch *tsb, int force); -void sfw_destroy_session(struct sfw_session *sn); - -static inline struct sfw_test_case * -sfw_find_test_case(int id) -{ - struct sfw_test_case *tsc; - - LASSERT(id <= SRPC_SERVICE_MAX_ID); - LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID); - - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - if (tsc->tsc_srv_service->sv_id == id) - return tsc; - } - - return NULL; -} - -static int -sfw_register_test(struct srpc_service *service, - struct sfw_test_client_ops *cliops) -{ - struct sfw_test_case *tsc; - - if (sfw_find_test_case(service->sv_id)) { - CERROR("Failed to register test %s (%d)\n", - service->sv_name, service->sv_id); - return -EEXIST; - } - - tsc = kzalloc(sizeof(struct sfw_test_case), GFP_NOFS); - if (!tsc) - return -ENOMEM; - - tsc->tsc_cli_ops = cliops; - tsc->tsc_srv_service = service; - - list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests); - return 0; -} - -static void -sfw_add_session_timer(void) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct stt_timer *timer = &sn->sn_timer; - - LASSERT(!sfw_data.fw_shuttingdown); - - if (!sn || !sn->sn_timeout) - return; - - LASSERT(!sn->sn_timer_active); - - sn->sn_timer_active = 1; - timer->stt_expires = ktime_get_real_seconds() + sn->sn_timeout; - stt_add_timer(timer); -} - -static int -sfw_del_session_timer(void) -{ - struct sfw_session *sn = sfw_data.fw_session; - - if (!sn || !sn->sn_timer_active) - return 0; - - LASSERT(sn->sn_timeout); - - if (stt_del_timer(&sn->sn_timer)) { /* timer defused */ - sn->sn_timer_active = 0; - return 0; - } - - return -EBUSY; /* racing with sfw_session_expired() */ -} - -static void -sfw_deactivate_session(void) -__must_hold(&sfw_data.fw_lock) -{ - struct sfw_session *sn = sfw_data.fw_session; - int nactive = 0; - struct sfw_batch *tsb; - struct sfw_test_case *tsc; - - if (!sn) - return; - - LASSERT(!sn->sn_timer_active); - - sfw_data.fw_session = NULL; - atomic_inc(&sfw_data.fw_nzombies); - list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions); - - spin_unlock(&sfw_data.fw_lock); - - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - srpc_abort_service(tsc->tsc_srv_service); - } - - spin_lock(&sfw_data.fw_lock); - - list_for_each_entry(tsb, &sn->sn_batches, bat_list) { - if (sfw_batch_active(tsb)) { - nactive++; - sfw_stop_batch(tsb, 1); - } - } - - if (nactive) - return; /* wait for active batches to stop */ - - list_del_init(&sn->sn_list); - spin_unlock(&sfw_data.fw_lock); - - sfw_destroy_session(sn); - - spin_lock(&sfw_data.fw_lock); -} - -static void -sfw_session_expired(void *data) -{ - struct sfw_session *sn = data; - - spin_lock(&sfw_data.fw_lock); - - LASSERT(sn->sn_timer_active); - LASSERT(sn == sfw_data.fw_session); - - CWARN("Session expired! sid: %s-%llu, name: %s\n", - libcfs_nid2str(sn->sn_id.ses_nid), - sn->sn_id.ses_stamp, &sn->sn_name[0]); - - sn->sn_timer_active = 0; - sfw_deactivate_session(); - - spin_unlock(&sfw_data.fw_lock); -} - -static inline void -sfw_init_session(struct sfw_session *sn, struct lst_sid sid, - unsigned int features, const char *name) -{ - struct stt_timer *timer = &sn->sn_timer; - - memset(sn, 0, sizeof(struct sfw_session)); - INIT_LIST_HEAD(&sn->sn_list); - INIT_LIST_HEAD(&sn->sn_batches); - atomic_set(&sn->sn_refcount, 1); /* +1 for caller */ - atomic_set(&sn->sn_brw_errors, 0); - atomic_set(&sn->sn_ping_errors, 0); - strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name)); - - sn->sn_timer_active = 0; - sn->sn_id = sid; - sn->sn_features = features; - sn->sn_timeout = session_timeout; - sn->sn_started = cfs_time_current(); - - timer->stt_data = sn; - timer->stt_func = sfw_session_expired; - INIT_LIST_HEAD(&timer->stt_list); -} - -/* completion handler for incoming framework RPCs */ -static void -sfw_server_rpc_done(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - int status = rpc->srpc_status; - - CDEBUG(D_NET, "Incoming framework RPC done: service %s, peer %s, status %s:%d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), - status); - - if (rpc->srpc_bulk) - sfw_free_pages(rpc); -} - -static void -sfw_client_rpc_fini(struct srpc_client_rpc *rpc) -{ - LASSERT(!rpc->crpc_bulk.bk_niov); - LASSERT(list_empty(&rpc->crpc_list)); - LASSERT(!atomic_read(&rpc->crpc_refcount)); - - CDEBUG(D_NET, "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.swi_state), - rpc->crpc_aborted, rpc->crpc_status); - - spin_lock(&sfw_data.fw_lock); - - /* my callers must finish all RPCs before shutting me down */ - LASSERT(!sfw_data.fw_shuttingdown); - list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs); - - spin_unlock(&sfw_data.fw_lock); -} - -static struct sfw_batch * -sfw_find_batch(struct lst_bid bid) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct sfw_batch *bat; - - LASSERT(sn); - - list_for_each_entry(bat, &sn->sn_batches, bat_list) { - if (bat->bat_id.bat_id == bid.bat_id) - return bat; - } - - return NULL; -} - -static struct sfw_batch * -sfw_bid2batch(struct lst_bid bid) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct sfw_batch *bat; - - LASSERT(sn); - - bat = sfw_find_batch(bid); - if (bat) - return bat; - - bat = kzalloc(sizeof(struct sfw_batch), GFP_NOFS); - if (!bat) - return NULL; - - bat->bat_error = 0; - bat->bat_session = sn; - bat->bat_id = bid; - atomic_set(&bat->bat_nactive, 0); - INIT_LIST_HEAD(&bat->bat_tests); - - list_add_tail(&bat->bat_list, &sn->sn_batches); - return bat; -} - -static int -sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct sfw_counters *cnt = &reply->str_fw; - struct sfw_batch *bat; - - reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (request->str_sid.ses_nid == LNET_NID_ANY) { - reply->str_status = EINVAL; - return 0; - } - - if (!sn || !sfw_sid_equal(request->str_sid, sn->sn_id)) { - reply->str_status = ESRCH; - return 0; - } - - lnet_counters_get(&reply->str_lnet); - srpc_get_counters(&reply->str_rpc); - - /* - * send over the msecs since the session was started - * with 32 bits to send, this is ~49 days - */ - cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started); - cnt->brw_errors = atomic_read(&sn->sn_brw_errors); - cnt->ping_errors = atomic_read(&sn->sn_ping_errors); - cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies); - - cnt->active_batches = 0; - list_for_each_entry(bat, &sn->sn_batches, bat_list) { - if (atomic_read(&bat->bat_nactive) > 0) - cnt->active_batches++; - } - - reply->str_status = 0; - return 0; -} - -int -sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct srpc_msg *msg = container_of(request, struct srpc_msg, - msg_body.mksn_reqst); - int cplen = 0; - - if (request->mksn_sid.ses_nid == LNET_NID_ANY) { - reply->mksn_sid = !sn ? LST_INVALID_SID : sn->sn_id; - reply->mksn_status = EINVAL; - return 0; - } - - if (sn) { - reply->mksn_status = 0; - reply->mksn_sid = sn->sn_id; - reply->mksn_timeout = sn->sn_timeout; - - if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) { - atomic_inc(&sn->sn_refcount); - return 0; - } - - if (!request->mksn_force) { - reply->mksn_status = EBUSY; - cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0], - sizeof(reply->mksn_name)); - if (cplen >= sizeof(reply->mksn_name)) - return -E2BIG; - return 0; - } - } - - /* - * reject the request if it requires unknown features - * NB: old version will always accept all features because it's not - * aware of srpc_msg::msg_ses_feats, it's a defect but it's also - * harmless because it will return zero feature to console, and it's - * console's responsibility to make sure all nodes in a session have - * same feature mask. - */ - if (msg->msg_ses_feats & ~LST_FEATS_MASK) { - reply->mksn_status = EPROTO; - return 0; - } - - /* brand new or create by force */ - sn = kzalloc(sizeof(struct sfw_session), GFP_NOFS); - if (!sn) { - CERROR("dropping RPC mksn under memory pressure\n"); - return -ENOMEM; - } - - sfw_init_session(sn, request->mksn_sid, - msg->msg_ses_feats, &request->mksn_name[0]); - - spin_lock(&sfw_data.fw_lock); - - sfw_deactivate_session(); - LASSERT(!sfw_data.fw_session); - sfw_data.fw_session = sn; - - spin_unlock(&sfw_data.fw_lock); - - reply->mksn_status = 0; - reply->mksn_sid = sn->sn_id; - reply->mksn_timeout = sn->sn_timeout; - return 0; -} - -static int -sfw_remove_session(struct srpc_rmsn_reqst *request, - struct srpc_rmsn_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - - reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (request->rmsn_sid.ses_nid == LNET_NID_ANY) { - reply->rmsn_status = EINVAL; - return 0; - } - - if (!sn || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) { - reply->rmsn_status = !sn ? ESRCH : EBUSY; - return 0; - } - - if (!atomic_dec_and_test(&sn->sn_refcount)) { - reply->rmsn_status = 0; - return 0; - } - - spin_lock(&sfw_data.fw_lock); - sfw_deactivate_session(); - spin_unlock(&sfw_data.fw_lock); - - reply->rmsn_status = 0; - reply->rmsn_sid = LST_INVALID_SID; - LASSERT(!sfw_data.fw_session); - return 0; -} - -static int -sfw_debug_session(struct srpc_debug_reqst *request, - struct srpc_debug_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - - if (!sn) { - reply->dbg_status = ESRCH; - reply->dbg_sid = LST_INVALID_SID; - return 0; - } - - reply->dbg_status = 0; - reply->dbg_sid = sn->sn_id; - reply->dbg_timeout = sn->sn_timeout; - if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name)) - >= sizeof(reply->dbg_name)) - return -E2BIG; - - return 0; -} - -static void -sfw_test_rpc_fini(struct srpc_client_rpc *rpc) -{ - struct sfw_test_unit *tsu = rpc->crpc_priv; - struct sfw_test_instance *tsi = tsu->tsu_instance; - - /* Called with hold of tsi->tsi_lock */ - LASSERT(list_empty(&rpc->crpc_list)); - list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs); -} - -static inline int -sfw_test_buffers(struct sfw_test_instance *tsi) -{ - struct sfw_test_case *tsc; - struct srpc_service *svc; - int nbuf; - - LASSERT(tsi); - tsc = sfw_find_test_case(tsi->tsi_service); - LASSERT(tsc); - svc = tsc->tsc_srv_service; - LASSERT(svc); - - nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts; - return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA); -} - -static int -sfw_load_test(struct sfw_test_instance *tsi) -{ - struct sfw_test_case *tsc; - struct srpc_service *svc; - int nbuf; - int rc; - - LASSERT(tsi); - tsc = sfw_find_test_case(tsi->tsi_service); - nbuf = sfw_test_buffers(tsi); - LASSERT(tsc); - svc = tsc->tsc_srv_service; - - if (tsi->tsi_is_client) { - tsi->tsi_ops = tsc->tsc_cli_ops; - return 0; - } - - rc = srpc_service_add_buffers(svc, nbuf); - if (rc) { - CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n", - svc->sv_name, nbuf, rc); - /* - * NB: this error handler is not strictly correct, because - * it may release more buffers than already allocated, - * but it doesn't matter because request portal should - * be lazy portal and will grow buffers if necessary. - */ - srpc_service_remove_buffers(svc, nbuf); - return -ENOMEM; - } - - CDEBUG(D_NET, "Reserved %d buffers for test %s\n", - nbuf * (srpc_serv_is_framework(svc) ? - 2 : cfs_cpt_number(cfs_cpt_table)), svc->sv_name); - return 0; -} - -static void -sfw_unload_test(struct sfw_test_instance *tsi) -{ - struct sfw_test_case *tsc; - - LASSERT(tsi); - tsc = sfw_find_test_case(tsi->tsi_service); - LASSERT(tsc); - - if (tsi->tsi_is_client) - return; - - /* - * shrink buffers, because request portal is lazy portal - * which can grow buffers at runtime so we may leave - * some buffers behind, but never mind... - */ - srpc_service_remove_buffers(tsc->tsc_srv_service, - sfw_test_buffers(tsi)); -} - -static void -sfw_destroy_test_instance(struct sfw_test_instance *tsi) -{ - struct srpc_client_rpc *rpc; - struct sfw_test_unit *tsu; - - if (!tsi->tsi_is_client) - goto clean; - - tsi->tsi_ops->tso_fini(tsi); - - LASSERT(!tsi->tsi_stopping); - LASSERT(list_empty(&tsi->tsi_active_rpcs)); - LASSERT(!sfw_test_active(tsi)); - - while (!list_empty(&tsi->tsi_units)) { - tsu = list_entry(tsi->tsi_units.next, - struct sfw_test_unit, tsu_list); - list_del(&tsu->tsu_list); - kfree(tsu); - } - - while (!list_empty(&tsi->tsi_free_rpcs)) { - rpc = list_entry(tsi->tsi_free_rpcs.next, - struct srpc_client_rpc, crpc_list); - list_del(&rpc->crpc_list); - kfree(rpc); - } - -clean: - sfw_unload_test(tsi); - kfree(tsi); -} - -static void -sfw_destroy_batch(struct sfw_batch *tsb) -{ - struct sfw_test_instance *tsi; - - LASSERT(!sfw_batch_active(tsb)); - LASSERT(list_empty(&tsb->bat_list)); - - while (!list_empty(&tsb->bat_tests)) { - tsi = list_entry(tsb->bat_tests.next, - struct sfw_test_instance, tsi_list); - list_del_init(&tsi->tsi_list); - sfw_destroy_test_instance(tsi); - } - - kfree(tsb); -} - -void -sfw_destroy_session(struct sfw_session *sn) -{ - struct sfw_batch *batch; - - LASSERT(list_empty(&sn->sn_list)); - LASSERT(sn != sfw_data.fw_session); - - while (!list_empty(&sn->sn_batches)) { - batch = list_entry(sn->sn_batches.next, - struct sfw_batch, bat_list); - list_del_init(&batch->bat_list); - sfw_destroy_batch(batch); - } - - kfree(sn); - atomic_dec(&sfw_data.fw_nzombies); -} - -static void -sfw_unpack_addtest_req(struct srpc_msg *msg) -{ - struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; - - LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST); - LASSERT(req->tsr_is_client); - - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - if (req->tsr_service == SRPC_SERVICE_BRW) { - if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) { - struct test_bulk_req *bulk = &req->tsr_u.bulk_v0; - - __swab32s(&bulk->blk_opc); - __swab32s(&bulk->blk_npg); - __swab32s(&bulk->blk_flags); - - } else { - struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1; - - __swab16s(&bulk->blk_opc); - __swab16s(&bulk->blk_flags); - __swab32s(&bulk->blk_offset); - __swab32s(&bulk->blk_len); - } - - return; - } - - if (req->tsr_service == SRPC_SERVICE_PING) { - struct test_ping_req *ping = &req->tsr_u.ping; - - __swab32s(&ping->png_size); - __swab32s(&ping->png_flags); - return; - } - - LBUG(); -} - -static int -sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc) -{ - struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; - struct srpc_bulk *bk = rpc->srpc_bulk; - int ndest = req->tsr_ndest; - struct sfw_test_unit *tsu; - struct sfw_test_instance *tsi; - int i; - int rc; - - tsi = kzalloc(sizeof(*tsi), GFP_NOFS); - if (!tsi) { - CERROR("Can't allocate test instance for batch: %llu\n", - tsb->bat_id.bat_id); - return -ENOMEM; - } - - spin_lock_init(&tsi->tsi_lock); - atomic_set(&tsi->tsi_nactive, 0); - INIT_LIST_HEAD(&tsi->tsi_units); - INIT_LIST_HEAD(&tsi->tsi_free_rpcs); - INIT_LIST_HEAD(&tsi->tsi_active_rpcs); - - tsi->tsi_stopping = 0; - tsi->tsi_batch = tsb; - tsi->tsi_loop = req->tsr_loop; - tsi->tsi_concur = req->tsr_concur; - tsi->tsi_service = req->tsr_service; - tsi->tsi_is_client = !!(req->tsr_is_client); - tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr); - - rc = sfw_load_test(tsi); - if (rc) { - kfree(tsi); - return rc; - } - - LASSERT(!sfw_batch_active(tsb)); - - if (!tsi->tsi_is_client) { - /* it's test server, just add it to tsb */ - list_add_tail(&tsi->tsi_list, &tsb->bat_tests); - return 0; - } - - LASSERT(bk); - LASSERT(bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest); - LASSERT((unsigned int)bk->bk_len >= - sizeof(struct lnet_process_id_packed) * ndest); - - sfw_unpack_addtest_req(msg); - memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u)); - - for (i = 0; i < ndest; i++) { - struct lnet_process_id_packed *dests; - struct lnet_process_id_packed id; - int j; - - dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page); - LASSERT(dests); /* my pages are within KVM always */ - id = dests[i % SFW_ID_PER_PAGE]; - if (msg->msg_magic != SRPC_MSG_MAGIC) - sfw_unpack_id(id); - - for (j = 0; j < tsi->tsi_concur; j++) { - tsu = kzalloc(sizeof(struct sfw_test_unit), GFP_NOFS); - if (!tsu) { - rc = -ENOMEM; - CERROR("Can't allocate tsu for %d\n", - tsi->tsi_service); - goto error; - } - - tsu->tsu_dest.nid = id.nid; - tsu->tsu_dest.pid = id.pid; - tsu->tsu_instance = tsi; - tsu->tsu_private = NULL; - list_add_tail(&tsu->tsu_list, &tsi->tsi_units); - } - } - - rc = tsi->tsi_ops->tso_init(tsi); - if (!rc) { - list_add_tail(&tsi->tsi_list, &tsb->bat_tests); - return 0; - } - -error: - LASSERT(rc); - sfw_destroy_test_instance(tsi); - return rc; -} - -static void -sfw_test_unit_done(struct sfw_test_unit *tsu) -{ - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_batch *tsb = tsi->tsi_batch; - struct sfw_session *sn = tsb->bat_session; - - LASSERT(sfw_test_active(tsi)); - - if (!atomic_dec_and_test(&tsi->tsi_nactive)) - return; - - /* the test instance is done */ - spin_lock(&tsi->tsi_lock); - - tsi->tsi_stopping = 0; - - spin_unlock(&tsi->tsi_lock); - - spin_lock(&sfw_data.fw_lock); - - if (!atomic_dec_and_test(&tsb->bat_nactive) || /* tsb still active */ - sn == sfw_data.fw_session) { /* sn also active */ - spin_unlock(&sfw_data.fw_lock); - return; - } - - LASSERT(!list_empty(&sn->sn_list)); /* I'm a zombie! */ - - list_for_each_entry(tsb, &sn->sn_batches, bat_list) { - if (sfw_batch_active(tsb)) { - spin_unlock(&sfw_data.fw_lock); - return; - } - } - - list_del_init(&sn->sn_list); - spin_unlock(&sfw_data.fw_lock); - - sfw_destroy_session(sn); -} - -static void -sfw_test_rpc_done(struct srpc_client_rpc *rpc) -{ - struct sfw_test_unit *tsu = rpc->crpc_priv; - struct sfw_test_instance *tsi = tsu->tsu_instance; - int done = 0; - - tsi->tsi_ops->tso_done_rpc(tsu, rpc); - - spin_lock(&tsi->tsi_lock); - - LASSERT(sfw_test_active(tsi)); - LASSERT(!list_empty(&rpc->crpc_list)); - - list_del_init(&rpc->crpc_list); - - /* batch is stopping or loop is done or get error */ - if (tsi->tsi_stopping || !tsu->tsu_loop || - (rpc->crpc_status && tsi->tsi_stoptsu_onerr)) - done = 1; - - /* dec ref for poster */ - srpc_client_rpc_decref(rpc); - - spin_unlock(&tsi->tsi_lock); - - if (!done) { - swi_schedule_workitem(&tsu->tsu_worker); - return; - } - - sfw_test_unit_done(tsu); -} - -int -sfw_create_test_rpc(struct sfw_test_unit *tsu, struct lnet_process_id peer, - unsigned int features, int nblk, int blklen, - struct srpc_client_rpc **rpcpp) -{ - struct srpc_client_rpc *rpc = NULL; - struct sfw_test_instance *tsi = tsu->tsu_instance; - - spin_lock(&tsi->tsi_lock); - - LASSERT(sfw_test_active(tsi)); - /* pick request from buffer */ - rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs, - struct srpc_client_rpc, crpc_list); - if (rpc) { - LASSERT(nblk == rpc->crpc_bulk.bk_niov); - list_del_init(&rpc->crpc_list); - } - - spin_unlock(&tsi->tsi_lock); - - if (!rpc) { - rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk, - blklen, sfw_test_rpc_done, - sfw_test_rpc_fini, tsu); - } else { - srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk, - blklen, sfw_test_rpc_done, - sfw_test_rpc_fini, tsu); - } - - if (!rpc) { - CERROR("Can't create rpc for test %d\n", tsi->tsi_service); - return -ENOMEM; - } - - rpc->crpc_reqstmsg.msg_ses_feats = features; - *rpcpp = rpc; - - return 0; -} - -static void -sfw_run_test(struct swi_workitem *wi) -{ - struct sfw_test_unit *tsu = container_of(wi, struct sfw_test_unit, tsu_worker); - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct srpc_client_rpc *rpc = NULL; - - if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc)) { - LASSERT(!rpc); - goto test_done; - } - - LASSERT(rpc); - - spin_lock(&tsi->tsi_lock); - - if (tsi->tsi_stopping) { - list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs); - spin_unlock(&tsi->tsi_lock); - goto test_done; - } - - if (tsu->tsu_loop > 0) - tsu->tsu_loop--; - - list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs); - spin_unlock(&tsi->tsi_lock); - - spin_lock(&rpc->crpc_lock); - rpc->crpc_timeout = rpc_timeout; - srpc_post_rpc(rpc); - spin_unlock(&rpc->crpc_lock); - return; - -test_done: - /* - * No one can schedule me now since: - * - previous RPC, if any, has done and - * - no new RPC is initiated. - * - my batch is still active; no one can run it again now. - * Cancel pending schedules and prevent future schedule attempts: - */ - sfw_test_unit_done(tsu); -} - -static int -sfw_run_batch(struct sfw_batch *tsb) -{ - struct swi_workitem *wi; - struct sfw_test_unit *tsu; - struct sfw_test_instance *tsi; - - if (sfw_batch_active(tsb)) { - CDEBUG(D_NET, "Batch already active: %llu (%d)\n", - tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive)); - return 0; - } - - list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) { - if (!tsi->tsi_is_client) /* skip server instances */ - continue; - - LASSERT(!tsi->tsi_stopping); - LASSERT(!sfw_test_active(tsi)); - - atomic_inc(&tsb->bat_nactive); - - list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { - atomic_inc(&tsi->tsi_nactive); - tsu->tsu_loop = tsi->tsi_loop; - wi = &tsu->tsu_worker; - swi_init_workitem(wi, sfw_run_test, - lst_test_wq[lnet_cpt_of_nid(tsu->tsu_dest.nid)]); - swi_schedule_workitem(wi); - } - } - - return 0; -} - -int -sfw_stop_batch(struct sfw_batch *tsb, int force) -{ - struct sfw_test_instance *tsi; - struct srpc_client_rpc *rpc; - - if (!sfw_batch_active(tsb)) { - CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id); - return 0; - } - - list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) { - spin_lock(&tsi->tsi_lock); - - if (!tsi->tsi_is_client || - !sfw_test_active(tsi) || tsi->tsi_stopping) { - spin_unlock(&tsi->tsi_lock); - continue; - } - - tsi->tsi_stopping = 1; - - if (!force) { - spin_unlock(&tsi->tsi_lock); - continue; - } - - /* abort launched rpcs in the test */ - list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) { - spin_lock(&rpc->crpc_lock); - - srpc_abort_rpc(rpc, -EINTR); - - spin_unlock(&rpc->crpc_lock); - } - - spin_unlock(&tsi->tsi_lock); - } - - return 0; -} - -static int -sfw_query_batch(struct sfw_batch *tsb, int testidx, - struct srpc_batch_reply *reply) -{ - struct sfw_test_instance *tsi; - - if (testidx < 0) - return -EINVAL; - - if (!testidx) { - reply->bar_active = atomic_read(&tsb->bat_nactive); - return 0; - } - - list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) { - if (testidx-- > 1) - continue; - - reply->bar_active = atomic_read(&tsi->tsi_nactive); - return 0; - } - - return -ENOENT; -} - -void -sfw_free_pages(struct srpc_server_rpc *rpc) -{ - srpc_free_bulk(rpc->srpc_bulk); - rpc->srpc_bulk = NULL; -} - -int -sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, - int sink) -{ - LASSERT(!rpc->srpc_bulk); - LASSERT(npages > 0 && npages <= LNET_MAX_IOV); - - rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink); - if (!rpc->srpc_bulk) - return -ENOMEM; - - return 0; -} - -static int -sfw_add_test(struct srpc_server_rpc *rpc) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply; - struct srpc_test_reqst *request; - int rc; - struct sfw_batch *bat; - - request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst; - reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (!request->tsr_loop || - !request->tsr_concur || - request->tsr_sid.ses_nid == LNET_NID_ANY || - request->tsr_ndest > SFW_MAX_NDESTS || - (request->tsr_is_client && !request->tsr_ndest) || - request->tsr_concur > SFW_MAX_CONCUR || - request->tsr_service > SRPC_SERVICE_MAX_ID || - request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) { - reply->tsr_status = EINVAL; - return 0; - } - - if (!sn || !sfw_sid_equal(request->tsr_sid, sn->sn_id) || - !sfw_find_test_case(request->tsr_service)) { - reply->tsr_status = ENOENT; - return 0; - } - - bat = sfw_bid2batch(request->tsr_bid); - if (!bat) { - CERROR("dropping RPC %s from %s under memory pressure\n", - rpc->srpc_scd->scd_svc->sv_name, - libcfs_id2str(rpc->srpc_peer)); - return -ENOMEM; - } - - if (sfw_batch_active(bat)) { - reply->tsr_status = EBUSY; - return 0; - } - - if (request->tsr_is_client && !rpc->srpc_bulk) { - /* rpc will be resumed later in sfw_bulk_ready */ - int npg = sfw_id_pages(request->tsr_ndest); - int len; - - if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - len = npg * PAGE_SIZE; - - } else { - len = sizeof(struct lnet_process_id_packed) * - request->tsr_ndest; - } - - return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1); - } - - rc = sfw_add_test_instance(bat, rpc); - CDEBUG(!rc ? D_NET : D_WARNING, - "%s test: sv %d %s, loop %d, concur %d, ndest %d\n", - !rc ? "Added" : "Failed to add", request->tsr_service, - request->tsr_is_client ? "client" : "server", - request->tsr_loop, request->tsr_concur, request->tsr_ndest); - - reply->tsr_status = (rc < 0) ? -rc : rc; - return 0; -} - -static int -sfw_control_batch(struct srpc_batch_reqst *request, - struct srpc_batch_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - int rc = 0; - struct sfw_batch *bat; - - reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (!sn || !sfw_sid_equal(request->bar_sid, sn->sn_id)) { - reply->bar_status = ESRCH; - return 0; - } - - bat = sfw_find_batch(request->bar_bid); - if (!bat) { - reply->bar_status = ENOENT; - return 0; - } - - switch (request->bar_opc) { - case SRPC_BATCH_OPC_RUN: - rc = sfw_run_batch(bat); - break; - - case SRPC_BATCH_OPC_STOP: - rc = sfw_stop_batch(bat, request->bar_arg); - break; - - case SRPC_BATCH_OPC_QUERY: - rc = sfw_query_batch(bat, request->bar_testidx, reply); - break; - - default: - return -EINVAL; /* drop it */ - } - - reply->bar_status = (rc < 0) ? -rc : rc; - return 0; -} - -static int -sfw_handle_server_rpc(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - struct srpc_msg *reply = &rpc->srpc_replymsg; - struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg; - unsigned int features = LST_FEATS_MASK; - int rc = 0; - - LASSERT(!sfw_data.fw_active_srpc); - LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - spin_lock(&sfw_data.fw_lock); - - if (sfw_data.fw_shuttingdown) { - spin_unlock(&sfw_data.fw_lock); - return -ESHUTDOWN; - } - - /* Remove timer to avoid racing with it or expiring active session */ - if (sfw_del_session_timer()) { - CERROR("dropping RPC %s from %s: racing with expiry timer\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer)); - spin_unlock(&sfw_data.fw_lock); - return -EAGAIN; - } - - sfw_data.fw_active_srpc = rpc; - spin_unlock(&sfw_data.fw_lock); - - sfw_unpack_message(request); - LASSERT(request->msg_type == srpc_service2request(sv->sv_id)); - - /* rpc module should have checked this */ - LASSERT(request->msg_version == SRPC_MSG_VERSION); - - if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION && - sv->sv_id != SRPC_SERVICE_DEBUG) { - struct sfw_session *sn = sfw_data.fw_session; - - if (sn && - sn->sn_features != request->msg_ses_feats) { - CNETERR("Features of framework RPC don't match features of current session: %x/%x\n", - request->msg_ses_feats, sn->sn_features); - reply->msg_body.reply.status = EPROTO; - reply->msg_body.reply.sid = sn->sn_id; - goto out; - } - - } else if (request->msg_ses_feats & ~LST_FEATS_MASK) { - /* - * NB: at this point, old version will ignore features and - * create new session anyway, so console should be able - * to handle this - */ - reply->msg_body.reply.status = EPROTO; - goto out; - } - - switch (sv->sv_id) { - default: - LBUG(); - case SRPC_SERVICE_TEST: - rc = sfw_add_test(rpc); - break; - - case SRPC_SERVICE_BATCH: - rc = sfw_control_batch(&request->msg_body.bat_reqst, - &reply->msg_body.bat_reply); - break; - - case SRPC_SERVICE_QUERY_STAT: - rc = sfw_get_stats(&request->msg_body.stat_reqst, - &reply->msg_body.stat_reply); - break; - - case SRPC_SERVICE_DEBUG: - rc = sfw_debug_session(&request->msg_body.dbg_reqst, - &reply->msg_body.dbg_reply); - break; - - case SRPC_SERVICE_MAKE_SESSION: - rc = sfw_make_session(&request->msg_body.mksn_reqst, - &reply->msg_body.mksn_reply); - break; - - case SRPC_SERVICE_REMOVE_SESSION: - rc = sfw_remove_session(&request->msg_body.rmsn_reqst, - &reply->msg_body.rmsn_reply); - break; - } - - if (sfw_data.fw_session) - features = sfw_data.fw_session->sn_features; - out: - reply->msg_ses_feats = features; - rpc->srpc_done = sfw_server_rpc_done; - spin_lock(&sfw_data.fw_lock); - - if (!sfw_data.fw_shuttingdown) - sfw_add_session_timer(); - - sfw_data.fw_active_srpc = NULL; - spin_unlock(&sfw_data.fw_lock); - return rc; -} - -static int -sfw_bulk_ready(struct srpc_server_rpc *rpc, int status) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - int rc; - - LASSERT(rpc->srpc_bulk); - LASSERT(sv->sv_id == SRPC_SERVICE_TEST); - LASSERT(!sfw_data.fw_active_srpc); - LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client); - - spin_lock(&sfw_data.fw_lock); - - if (status) { - CERROR("Bulk transfer failed for RPC: service %s, peer %s, status %d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), status); - spin_unlock(&sfw_data.fw_lock); - return -EIO; - } - - if (sfw_data.fw_shuttingdown) { - spin_unlock(&sfw_data.fw_lock); - return -ESHUTDOWN; - } - - if (sfw_del_session_timer()) { - CERROR("dropping RPC %s from %s: racing with expiry timer\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer)); - spin_unlock(&sfw_data.fw_lock); - return -EAGAIN; - } - - sfw_data.fw_active_srpc = rpc; - spin_unlock(&sfw_data.fw_lock); - - rc = sfw_add_test(rpc); - - spin_lock(&sfw_data.fw_lock); - - if (!sfw_data.fw_shuttingdown) - sfw_add_session_timer(); - - sfw_data.fw_active_srpc = NULL; - spin_unlock(&sfw_data.fw_lock); - return rc; -} - -struct srpc_client_rpc * -sfw_create_rpc(struct lnet_process_id peer, int service, - unsigned int features, int nbulkiov, int bulklen, - void (*done)(struct srpc_client_rpc *), void *priv) -{ - struct srpc_client_rpc *rpc = NULL; - - spin_lock(&sfw_data.fw_lock); - - LASSERT(!sfw_data.fw_shuttingdown); - LASSERT(service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) { - rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - struct srpc_client_rpc, crpc_list); - list_del(&rpc->crpc_list); - - srpc_init_client_rpc(rpc, peer, service, 0, 0, - done, sfw_client_rpc_fini, priv); - } - - spin_unlock(&sfw_data.fw_lock); - - if (!rpc) { - rpc = srpc_create_client_rpc(peer, service, - nbulkiov, bulklen, done, - nbulkiov ? NULL : - sfw_client_rpc_fini, - priv); - } - - if (rpc) /* "session" is concept in framework */ - rpc->crpc_reqstmsg.msg_ses_feats = features; - - return rpc; -} - -void -sfw_unpack_message(struct srpc_msg *msg) -{ - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - /* srpc module should guarantee I wouldn't get crap */ - LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - if (msg->msg_type == SRPC_MSG_STAT_REQST) { - struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst; - - __swab32s(&req->str_type); - __swab64s(&req->str_rpyid); - sfw_unpack_sid(req->str_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_STAT_REPLY) { - struct srpc_stat_reply *rep = &msg->msg_body.stat_reply; - - __swab32s(&rep->str_status); - sfw_unpack_sid(rep->str_sid); - sfw_unpack_fw_counters(rep->str_fw); - sfw_unpack_rpc_counters(rep->str_rpc); - sfw_unpack_lnet_counters(rep->str_lnet); - return; - } - - if (msg->msg_type == SRPC_MSG_MKSN_REQST) { - struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst; - - __swab64s(&req->mksn_rpyid); - __swab32s(&req->mksn_force); - sfw_unpack_sid(req->mksn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_MKSN_REPLY) { - struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply; - - __swab32s(&rep->mksn_status); - __swab32s(&rep->mksn_timeout); - sfw_unpack_sid(rep->mksn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_RMSN_REQST) { - struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst; - - __swab64s(&req->rmsn_rpyid); - sfw_unpack_sid(req->rmsn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_RMSN_REPLY) { - struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply; - - __swab32s(&rep->rmsn_status); - sfw_unpack_sid(rep->rmsn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_DEBUG_REQST) { - struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst; - - __swab64s(&req->dbg_rpyid); - __swab32s(&req->dbg_flags); - sfw_unpack_sid(req->dbg_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) { - struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply; - - __swab32s(&rep->dbg_nbatch); - __swab32s(&rep->dbg_timeout); - sfw_unpack_sid(rep->dbg_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_BATCH_REQST) { - struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst; - - __swab32s(&req->bar_opc); - __swab64s(&req->bar_rpyid); - __swab32s(&req->bar_testidx); - __swab32s(&req->bar_arg); - sfw_unpack_sid(req->bar_sid); - __swab64s(&req->bar_bid.bat_id); - return; - } - - if (msg->msg_type == SRPC_MSG_BATCH_REPLY) { - struct srpc_batch_reply *rep = &msg->msg_body.bat_reply; - - __swab32s(&rep->bar_status); - sfw_unpack_sid(rep->bar_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_TEST_REQST) { - struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; - - __swab64s(&req->tsr_rpyid); - __swab64s(&req->tsr_bulkid); - __swab32s(&req->tsr_loop); - __swab32s(&req->tsr_ndest); - __swab32s(&req->tsr_concur); - __swab32s(&req->tsr_service); - sfw_unpack_sid(req->tsr_sid); - __swab64s(&req->tsr_bid.bat_id); - return; - } - - if (msg->msg_type == SRPC_MSG_TEST_REPLY) { - struct srpc_test_reply *rep = &msg->msg_body.tes_reply; - - __swab32s(&rep->tsr_status); - sfw_unpack_sid(rep->tsr_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_JOIN_REQST) { - struct srpc_join_reqst *req = &msg->msg_body.join_reqst; - - __swab64s(&req->join_rpyid); - sfw_unpack_sid(req->join_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_JOIN_REPLY) { - struct srpc_join_reply *rep = &msg->msg_body.join_reply; - - __swab32s(&rep->join_status); - __swab32s(&rep->join_timeout); - sfw_unpack_sid(rep->join_sid); - return; - } - - LBUG(); -} - -void -sfw_abort_rpc(struct srpc_client_rpc *rpc) -{ - LASSERT(atomic_read(&rpc->crpc_refcount) > 0); - LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - spin_lock(&rpc->crpc_lock); - srpc_abort_rpc(rpc, -EINTR); - spin_unlock(&rpc->crpc_lock); -} - -void -sfw_post_rpc(struct srpc_client_rpc *rpc) -{ - spin_lock(&rpc->crpc_lock); - - LASSERT(!rpc->crpc_closed); - LASSERT(!rpc->crpc_aborted); - LASSERT(list_empty(&rpc->crpc_list)); - LASSERT(!sfw_data.fw_shuttingdown); - - rpc->crpc_timeout = rpc_timeout; - srpc_post_rpc(rpc); - - spin_unlock(&rpc->crpc_lock); -} - -static struct srpc_service sfw_services[] = { - { - /* sv_id */ SRPC_SERVICE_DEBUG, - /* sv_name */ "debug", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_QUERY_STAT, - /* sv_name */ "query stats", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_MAKE_SESSION, - /* sv_name */ "make session", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_REMOVE_SESSION, - /* sv_name */ "remove session", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_BATCH, - /* sv_name */ "batch service", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_TEST, - /* sv_name */ "test service", - 0 - }, - { - /* sv_id */ 0, - /* sv_name */ NULL, - 0 - } -}; - -int -sfw_startup(void) -{ - int i; - int rc; - int error; - struct srpc_service *sv; - struct sfw_test_case *tsc; - - if (session_timeout < 0) { - CERROR("Session timeout must be non-negative: %d\n", - session_timeout); - return -EINVAL; - } - - if (rpc_timeout < 0) { - CERROR("RPC timeout must be non-negative: %d\n", - rpc_timeout); - return -EINVAL; - } - - if (!session_timeout) - CWARN("Zero session_timeout specified - test sessions never expire.\n"); - - if (!rpc_timeout) - CWARN("Zero rpc_timeout specified - test RPC never expire.\n"); - - memset(&sfw_data, 0, sizeof(struct smoketest_framework)); - - sfw_data.fw_session = NULL; - sfw_data.fw_active_srpc = NULL; - spin_lock_init(&sfw_data.fw_lock); - atomic_set(&sfw_data.fw_nzombies, 0); - INIT_LIST_HEAD(&sfw_data.fw_tests); - INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs); - INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions); - - brw_init_test_client(); - brw_init_test_service(); - rc = sfw_register_test(&brw_test_service, &brw_test_client); - LASSERT(!rc); - - ping_init_test_client(); - ping_init_test_service(); - rc = sfw_register_test(&ping_test_service, &ping_test_client); - LASSERT(!rc); - - error = 0; - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - sv = tsc->tsc_srv_service; - - rc = srpc_add_service(sv); - LASSERT(rc != -EBUSY); - if (rc) { - CWARN("Failed to add %s service: %d\n", - sv->sv_name, rc); - error = rc; - } - } - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (!sv->sv_name) - break; - - sv->sv_bulk_ready = NULL; - sv->sv_handler = sfw_handle_server_rpc; - sv->sv_wi_total = SFW_FRWK_WI_MAX; - if (sv->sv_id == SRPC_SERVICE_TEST) - sv->sv_bulk_ready = sfw_bulk_ready; - - rc = srpc_add_service(sv); - LASSERT(rc != -EBUSY); - if (rc) { - CWARN("Failed to add %s service: %d\n", - sv->sv_name, rc); - error = rc; - } - - /* about to sfw_shutdown, no need to add buffer */ - if (error) - continue; - - rc = srpc_service_add_buffers(sv, sv->sv_wi_total); - if (rc) { - CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n", - sv->sv_name, sv->sv_wi_total, rc); - error = -ENOMEM; - } - } - - if (error) - sfw_shutdown(); - return error; -} - -void -sfw_shutdown(void) -{ - struct srpc_service *sv; - struct sfw_test_case *tsc; - int i; - - spin_lock(&sfw_data.fw_lock); - - sfw_data.fw_shuttingdown = 1; - lst_wait_until(!sfw_data.fw_active_srpc, sfw_data.fw_lock, - "waiting for active RPC to finish.\n"); - - if (sfw_del_session_timer()) - lst_wait_until(!sfw_data.fw_session, sfw_data.fw_lock, - "waiting for session timer to explode.\n"); - - sfw_deactivate_session(); - lst_wait_until(!atomic_read(&sfw_data.fw_nzombies), - sfw_data.fw_lock, - "waiting for %d zombie sessions to die.\n", - atomic_read(&sfw_data.fw_nzombies)); - - spin_unlock(&sfw_data.fw_lock); - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (!sv->sv_name) - break; - - srpc_shutdown_service(sv); - srpc_remove_service(sv); - } - - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - sv = tsc->tsc_srv_service; - srpc_shutdown_service(sv); - srpc_remove_service(sv); - } - - while (!list_empty(&sfw_data.fw_zombie_rpcs)) { - struct srpc_client_rpc *rpc; - - rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - struct srpc_client_rpc, crpc_list); - list_del(&rpc->crpc_list); - - kfree(rpc); - } - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (!sv->sv_name) - break; - - srpc_wait_service_shutdown(sv); - } - - while (!list_empty(&sfw_data.fw_tests)) { - tsc = list_entry(sfw_data.fw_tests.next, - struct sfw_test_case, tsc_list); - - srpc_wait_service_shutdown(tsc->tsc_srv_service); - - list_del(&tsc->tsc_list); - kfree(tsc); - } -} diff --git a/drivers/staging/lustre/lnet/selftest/module.c b/drivers/staging/lustre/lnet/selftest/module.c deleted file mode 100644 index 7359aa56d9b3..000000000000 --- a/drivers/staging/lustre/lnet/selftest/module.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" -#include "console.h" - -enum { - LST_INIT_NONE = 0, - LST_INIT_WI_SERIAL, - LST_INIT_WI_TEST, - LST_INIT_RPC, - LST_INIT_FW, - LST_INIT_CONSOLE -}; - -static int lst_init_step = LST_INIT_NONE; - -struct workqueue_struct *lst_serial_wq; -struct workqueue_struct **lst_test_wq; - -static void -lnet_selftest_exit(void) -{ - int i; - - switch (lst_init_step) { - case LST_INIT_CONSOLE: - lstcon_console_fini(); - /* fall through */ - case LST_INIT_FW: - sfw_shutdown(); - /* fall through */ - case LST_INIT_RPC: - srpc_shutdown(); - /* fall through */ - case LST_INIT_WI_TEST: - for (i = 0; - i < cfs_cpt_number(lnet_cpt_table()); i++) { - if (!lst_test_wq[i]) - continue; - destroy_workqueue(lst_test_wq[i]); - } - kvfree(lst_test_wq); - lst_test_wq = NULL; - /* fall through */ - case LST_INIT_WI_SERIAL: - destroy_workqueue(lst_serial_wq); - lst_serial_wq = NULL; - case LST_INIT_NONE: - break; - default: - LBUG(); - } -} - -static int -lnet_selftest_init(void) -{ - int nscheds; - int rc = -ENOMEM; - int i; - - lst_serial_wq = alloc_ordered_workqueue("lst_s", 0); - if (!lst_serial_wq) { - CERROR("Failed to create serial WI scheduler for LST\n"); - return -ENOMEM; - } - lst_init_step = LST_INIT_WI_SERIAL; - - nscheds = cfs_cpt_number(lnet_cpt_table()); - lst_test_wq = kvmalloc_array(nscheds, sizeof(lst_test_wq[0]), - GFP_KERNEL | __GFP_ZERO); - if (!lst_test_wq) { - rc = -ENOMEM; - goto error; - } - - lst_init_step = LST_INIT_WI_TEST; - for (i = 0; i < nscheds; i++) { - int nthrs = cfs_cpt_weight(lnet_cpt_table(), i); - struct workqueue_attrs attrs = {0}; - cpumask_var_t *mask = cfs_cpt_cpumask(lnet_cpt_table(), i); - - /* reserve at least one CPU for LND */ - nthrs = max(nthrs - 1, 1); - lst_test_wq[i] = alloc_workqueue("lst_t", WQ_UNBOUND, nthrs); - if (!lst_test_wq[i]) { - CWARN("Failed to create CPU partition affinity WI scheduler %d for LST\n", - i); - rc = -ENOMEM; - goto error; - } - - if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) { - cpumask_copy(attrs.cpumask, *mask); - apply_workqueue_attrs(lst_test_wq[i], &attrs); - free_cpumask_var(attrs.cpumask); - } - } - - rc = srpc_startup(); - if (rc) { - CERROR("LST can't startup rpc\n"); - goto error; - } - lst_init_step = LST_INIT_RPC; - - rc = sfw_startup(); - if (rc) { - CERROR("LST can't startup framework\n"); - goto error; - } - lst_init_step = LST_INIT_FW; - - rc = lstcon_console_init(); - if (rc) { - CERROR("LST can't startup console\n"); - goto error; - } - lst_init_step = LST_INIT_CONSOLE; - return 0; -error: - lnet_selftest_exit(); - return rc; -} - -MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("LNet Selftest"); -MODULE_VERSION("2.7.0"); -MODULE_LICENSE("GPL"); - -module_init(lnet_selftest_init); -module_exit(lnet_selftest_exit); diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c deleted file mode 100644 index f54bd630dbf8..000000000000 --- a/drivers/staging/lustre/lnet/selftest/ping_test.c +++ /dev/null @@ -1,228 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * Test client & Server - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - */ - -#include "selftest.h" - -#define LST_PING_TEST_MAGIC 0xbabeface - -static int ping_srv_workitems = SFW_TEST_WI_MAX; -module_param(ping_srv_workitems, int, 0644); -MODULE_PARM_DESC(ping_srv_workitems, "# PING server workitems"); - -struct lst_ping_data { - spinlock_t pnd_lock; /* serialize */ - int pnd_counter; /* sequence counter */ -}; - -static struct lst_ping_data lst_ping_data; - -static int -ping_client_init(struct sfw_test_instance *tsi) -{ - struct sfw_session *sn = tsi->tsi_batch->bat_session; - - LASSERT(tsi->tsi_is_client); - LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK)); - - spin_lock_init(&lst_ping_data.pnd_lock); - lst_ping_data.pnd_counter = 0; - - return 0; -} - -static void -ping_client_fini(struct sfw_test_instance *tsi) -{ - struct sfw_session *sn = tsi->tsi_batch->bat_session; - int errors; - - LASSERT(sn); - LASSERT(tsi->tsi_is_client); - - errors = atomic_read(&sn->sn_ping_errors); - if (errors) - CWARN("%d pings have failed.\n", errors); - else - CDEBUG(D_NET, "Ping test finished OK.\n"); -} - -static int -ping_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest, - struct srpc_client_rpc **rpc) -{ - struct srpc_ping_reqst *req; - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct timespec64 ts; - int rc; - - LASSERT(sn); - LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - - rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc); - if (rc) - return rc; - - req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst; - - req->pnr_magic = LST_PING_TEST_MAGIC; - - spin_lock(&lst_ping_data.pnd_lock); - req->pnr_seq = lst_ping_data.pnd_counter++; - spin_unlock(&lst_ping_data.pnd_lock); - - ktime_get_real_ts64(&ts); - req->pnr_time_sec = ts.tv_sec; - req->pnr_time_usec = ts.tv_nsec / NSEC_PER_USEC; - - return rc; -} - -static void -ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) -{ - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst; - struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply; - struct timespec64 ts; - - LASSERT(sn); - - if (rpc->crpc_status) { - if (!tsi->tsi_stopping) /* rpc could have been aborted */ - atomic_inc(&sn->sn_ping_errors); - CERROR("Unable to ping %s (%d): %d\n", - libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq, rpc->crpc_status); - return; - } - - if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) { - __swab32s(&reply->pnr_seq); - __swab32s(&reply->pnr_magic); - __swab32s(&reply->pnr_status); - } - - if (reply->pnr_magic != LST_PING_TEST_MAGIC) { - rpc->crpc_status = -EBADMSG; - atomic_inc(&sn->sn_ping_errors); - CERROR("Bad magic %u from %s, %u expected.\n", - reply->pnr_magic, libcfs_id2str(rpc->crpc_dest), - LST_PING_TEST_MAGIC); - return; - } - - if (reply->pnr_seq != reqst->pnr_seq) { - rpc->crpc_status = -EBADMSG; - atomic_inc(&sn->sn_ping_errors); - CERROR("Bad seq %u from %s, %u expected.\n", - reply->pnr_seq, libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq); - return; - } - - ktime_get_real_ts64(&ts); - CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq, - (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 + - (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec))); -} - -static int -ping_server_handle(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_msg *replymsg = &rpc->srpc_replymsg; - struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst; - struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply; - - LASSERT(sv->sv_id == SRPC_SERVICE_PING); - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) { - LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - __swab32s(&req->pnr_seq); - __swab32s(&req->pnr_magic); - __swab64s(&req->pnr_time_sec); - __swab64s(&req->pnr_time_usec); - } - LASSERT(reqstmsg->msg_type == srpc_service2request(sv->sv_id)); - - if (req->pnr_magic != LST_PING_TEST_MAGIC) { - CERROR("Unexpected magic %08x from %s\n", - req->pnr_magic, libcfs_id2str(rpc->srpc_peer)); - return -EINVAL; - } - - rep->pnr_seq = req->pnr_seq; - rep->pnr_magic = LST_PING_TEST_MAGIC; - - if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) { - replymsg->msg_ses_feats = LST_FEATS_MASK; - rep->pnr_status = EPROTO; - return 0; - } - - replymsg->msg_ses_feats = reqstmsg->msg_ses_feats; - - CDEBUG(D_NET, "Get ping %d from %s\n", - req->pnr_seq, libcfs_id2str(rpc->srpc_peer)); - return 0; -} - -struct sfw_test_client_ops ping_test_client; - -void ping_init_test_client(void) -{ - ping_test_client.tso_init = ping_client_init; - ping_test_client.tso_fini = ping_client_fini; - ping_test_client.tso_prep_rpc = ping_client_prep_rpc; - ping_test_client.tso_done_rpc = ping_client_done_rpc; -} - -struct srpc_service ping_test_service; - -void ping_init_test_service(void) -{ - ping_test_service.sv_id = SRPC_SERVICE_PING; - ping_test_service.sv_name = "ping_test"; - ping_test_service.sv_handler = ping_server_handle; - ping_test_service.sv_wi_total = ping_srv_workitems; -} diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c deleted file mode 100644 index 9613b0a77007..000000000000 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ /dev/null @@ -1,1682 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/rpc.c - * - * Author: Isaac Huang <isaac@clusterfs.com> - * - * 2012-05-13: Liang Zhen <liang@whamcloud.com> - * - percpt data for service to improve smp performance - * - code cleanup - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - -enum srpc_state { - SRPC_STATE_NONE, - SRPC_STATE_NI_INIT, - SRPC_STATE_EQ_INIT, - SRPC_STATE_RUNNING, - SRPC_STATE_STOPPING, -}; - -static struct smoketest_rpc { - spinlock_t rpc_glock; /* global lock */ - struct srpc_service *rpc_services[SRPC_SERVICE_MAX_ID + 1]; - struct lnet_handle_eq rpc_lnet_eq; /* _the_ LNet event queue */ - enum srpc_state rpc_state; - struct srpc_counters rpc_counters; - __u64 rpc_matchbits; /* matchbits counter */ -} srpc_data; - -static inline int -srpc_serv_portal(int svc_id) -{ - return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ? - SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL; -} - -/* forward ref's */ -void srpc_handle_rpc(struct swi_workitem *wi); - -void srpc_get_counters(struct srpc_counters *cnt) -{ - spin_lock(&srpc_data.rpc_glock); - *cnt = srpc_data.rpc_counters; - spin_unlock(&srpc_data.rpc_glock); -} - -void srpc_set_counters(const struct srpc_counters *cnt) -{ - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters = *cnt; - spin_unlock(&srpc_data.rpc_glock); -} - -static int -srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off, - int nob) -{ - LASSERT(off < PAGE_SIZE); - LASSERT(nob > 0 && nob <= PAGE_SIZE); - - bk->bk_iovs[i].bv_offset = off; - bk->bk_iovs[i].bv_page = pg; - bk->bk_iovs[i].bv_len = nob; - return nob; -} - -void -srpc_free_bulk(struct srpc_bulk *bk) -{ - int i; - struct page *pg; - - LASSERT(bk); - - for (i = 0; i < bk->bk_niov; i++) { - pg = bk->bk_iovs[i].bv_page; - if (!pg) - break; - - __free_page(pg); - } - - kfree(bk); -} - -struct srpc_bulk * -srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg, - unsigned int bulk_len, int sink) -{ - struct srpc_bulk *bk; - int i; - - LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV); - - bk = kzalloc_cpt(offsetof(struct srpc_bulk, bk_iovs[bulk_npg]), - GFP_KERNEL, cpt); - if (!bk) { - CERROR("Can't allocate descriptor for %d pages\n", bulk_npg); - return NULL; - } - - memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg])); - bk->bk_sink = sink; - bk->bk_len = bulk_len; - bk->bk_niov = bulk_npg; - - for (i = 0; i < bulk_npg; i++) { - struct page *pg; - int nob; - - pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt), - GFP_KERNEL, 0); - if (!pg) { - CERROR("Can't allocate page %d of %d\n", i, bulk_npg); - srpc_free_bulk(bk); - return NULL; - } - - nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) - - bulk_off; - srpc_add_bulk_page(bk, pg, i, bulk_off, nob); - bulk_len -= nob; - bulk_off = 0; - } - - return bk; -} - -static inline __u64 -srpc_next_id(void) -{ - __u64 id; - - spin_lock(&srpc_data.rpc_glock); - id = srpc_data.rpc_matchbits++; - spin_unlock(&srpc_data.rpc_glock); - return id; -} - -static void -srpc_init_server_rpc(struct srpc_server_rpc *rpc, - struct srpc_service_cd *scd, - struct srpc_buffer *buffer) -{ - memset(rpc, 0, sizeof(*rpc)); - swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc, - srpc_serv_is_framework(scd->scd_svc) ? - lst_serial_wq : lst_test_wq[scd->scd_cpt]); - - rpc->srpc_ev.ev_fired = 1; /* no event expected now */ - - rpc->srpc_scd = scd; - rpc->srpc_reqstbuf = buffer; - rpc->srpc_peer = buffer->buf_peer; - rpc->srpc_self = buffer->buf_self; - LNetInvalidateMDHandle(&rpc->srpc_replymdh); -} - -static void -srpc_service_fini(struct srpc_service *svc) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - struct srpc_buffer *buf; - struct list_head *q; - int i; - - if (!svc->sv_cpt_data) - return; - - cfs_percpt_for_each(scd, i, svc->sv_cpt_data) { - while (1) { - if (!list_empty(&scd->scd_buf_posted)) - q = &scd->scd_buf_posted; - else if (!list_empty(&scd->scd_buf_blocked)) - q = &scd->scd_buf_blocked; - else - break; - - while (!list_empty(q)) { - buf = list_entry(q->next, struct srpc_buffer, - buf_list); - list_del(&buf->buf_list); - kfree(buf); - } - } - - LASSERT(list_empty(&scd->scd_rpc_active)); - - while (!list_empty(&scd->scd_rpc_free)) { - rpc = list_entry(scd->scd_rpc_free.next, - struct srpc_server_rpc, - srpc_list); - list_del(&rpc->srpc_list); - kfree(rpc); - } - } - - cfs_percpt_free(svc->sv_cpt_data); - svc->sv_cpt_data = NULL; -} - -static int -srpc_service_nrpcs(struct srpc_service *svc) -{ - int nrpcs = svc->sv_wi_total / svc->sv_ncpts; - - return srpc_serv_is_framework(svc) ? - max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN); -} - -void srpc_add_buffer(struct swi_workitem *wi); - -static int -srpc_service_init(struct srpc_service *svc) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - int nrpcs; - int i; - int j; - - svc->sv_shuttingdown = 0; - - svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(**svc->sv_cpt_data)); - if (!svc->sv_cpt_data) - return -ENOMEM; - - svc->sv_ncpts = srpc_serv_is_framework(svc) ? - 1 : cfs_cpt_number(lnet_cpt_table()); - nrpcs = srpc_service_nrpcs(svc); - - cfs_percpt_for_each(scd, i, svc->sv_cpt_data) { - scd->scd_cpt = i; - scd->scd_svc = svc; - spin_lock_init(&scd->scd_lock); - INIT_LIST_HEAD(&scd->scd_rpc_free); - INIT_LIST_HEAD(&scd->scd_rpc_active); - INIT_LIST_HEAD(&scd->scd_buf_posted); - INIT_LIST_HEAD(&scd->scd_buf_blocked); - - scd->scd_ev.ev_data = scd; - scd->scd_ev.ev_type = SRPC_REQUEST_RCVD; - - /* - * NB: don't use lst_serial_wq for adding buffer, - * see details in srpc_service_add_buffers() - */ - swi_init_workitem(&scd->scd_buf_wi, - srpc_add_buffer, lst_test_wq[i]); - - if (i && srpc_serv_is_framework(svc)) { - /* - * NB: framework service only needs srpc_service_cd for - * one partition, but we allocate for all to make - * it easier to implement, it will waste a little - * memory but nobody should care about this - */ - continue; - } - - for (j = 0; j < nrpcs; j++) { - rpc = kzalloc_cpt(sizeof(*rpc), GFP_NOFS, i); - if (!rpc) { - srpc_service_fini(svc); - return -ENOMEM; - } - list_add(&rpc->srpc_list, &scd->scd_rpc_free); - } - } - - return 0; -} - -int -srpc_add_service(struct srpc_service *sv) -{ - int id = sv->sv_id; - - LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID); - - if (srpc_service_init(sv)) - return -ENOMEM; - - spin_lock(&srpc_data.rpc_glock); - - LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING); - - if (srpc_data.rpc_services[id]) { - spin_unlock(&srpc_data.rpc_glock); - goto failed; - } - - srpc_data.rpc_services[id] = sv; - spin_unlock(&srpc_data.rpc_glock); - - CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name); - return 0; - - failed: - srpc_service_fini(sv); - return -EBUSY; -} - -int -srpc_remove_service(struct srpc_service *sv) -{ - int id = sv->sv_id; - - spin_lock(&srpc_data.rpc_glock); - - if (srpc_data.rpc_services[id] != sv) { - spin_unlock(&srpc_data.rpc_glock); - return -ENOENT; - } - - srpc_data.rpc_services[id] = NULL; - spin_unlock(&srpc_data.rpc_glock); - return 0; -} - -static int -srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf, - int len, int options, struct lnet_process_id peer, - struct lnet_handle_md *mdh, struct srpc_event *ev) -{ - int rc; - struct lnet_md md; - struct lnet_handle_me meh; - - rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK, - local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh); - if (rc) { - CERROR("LNetMEAttach failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - return -ENOMEM; - } - - md.threshold = 1; - md.user_ptr = ev; - md.start = buf; - md.length = len; - md.options = options; - md.eq_handle = srpc_data.rpc_lnet_eq; - - rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh); - if (rc) { - CERROR("LNetMDAttach failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - - rc = LNetMEUnlink(meh); - LASSERT(!rc); - return -ENOMEM; - } - - CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n", - libcfs_id2str(peer), portal, matchbits); - return 0; -} - -static int -srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, - int options, struct lnet_process_id peer, - lnet_nid_t self, struct lnet_handle_md *mdh, - struct srpc_event *ev) -{ - int rc; - struct lnet_md md; - - md.user_ptr = ev; - md.start = buf; - md.length = len; - md.eq_handle = srpc_data.rpc_lnet_eq; - md.threshold = options & LNET_MD_OP_GET ? 2 : 1; - md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET); - - rc = LNetMDBind(md, LNET_UNLINK, mdh); - if (rc) { - CERROR("LNetMDBind failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - return -ENOMEM; - } - - /* - * this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options. - * they're only meaningful for MDs attached to an ME (i.e. passive - * buffers... - */ - if (options & LNET_MD_OP_PUT) { - rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer, - portal, matchbits, 0, 0); - } else { - LASSERT(options & LNET_MD_OP_GET); - - rc = LNetGet(self, *mdh, peer, portal, matchbits, 0); - } - - if (rc) { - CERROR("LNet%s(%s, %d, %lld) failed: %d\n", - options & LNET_MD_OP_PUT ? "Put" : "Get", - libcfs_id2str(peer), portal, matchbits, rc); - - /* - * The forthcoming unlink event will complete this operation - * with failure, so fall through and return success here. - */ - rc = LNetMDUnlink(*mdh); - LASSERT(!rc); - } else { - CDEBUG(D_NET, "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n", - libcfs_id2str(peer), portal, matchbits); - } - return 0; -} - -static int -srpc_post_passive_rqtbuf(int service, int local, void *buf, int len, - struct lnet_handle_md *mdh, struct srpc_event *ev) -{ - struct lnet_process_id any = { 0 }; - - any.nid = LNET_NID_ANY; - any.pid = LNET_PID_ANY; - - return srpc_post_passive_rdma(srpc_serv_portal(service), - local, service, buf, len, - LNET_MD_OP_PUT, any, mdh, ev); -} - -static int -srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf) -__must_hold(&scd->scd_lock) -{ - struct srpc_service *sv = scd->scd_svc; - struct srpc_msg *msg = &buf->buf_msg; - int rc; - - LNetInvalidateMDHandle(&buf->buf_mdh); - list_add(&buf->buf_list, &scd->scd_buf_posted); - scd->scd_buf_nposted++; - spin_unlock(&scd->scd_lock); - - rc = srpc_post_passive_rqtbuf(sv->sv_id, - !srpc_serv_is_framework(sv), - msg, sizeof(*msg), &buf->buf_mdh, - &scd->scd_ev); - - /* - * At this point, a RPC (new or delayed) may have arrived in - * msg and its event handler has been called. So we must add - * buf to scd_buf_posted _before_ dropping scd_lock - */ - spin_lock(&scd->scd_lock); - - if (!rc) { - if (!sv->sv_shuttingdown) - return 0; - - spin_unlock(&scd->scd_lock); - /* - * srpc_shutdown_service might have tried to unlink me - * when my buf_mdh was still invalid - */ - LNetMDUnlink(buf->buf_mdh); - spin_lock(&scd->scd_lock); - return 0; - } - - scd->scd_buf_nposted--; - if (sv->sv_shuttingdown) - return rc; /* don't allow to change scd_buf_posted */ - - list_del(&buf->buf_list); - spin_unlock(&scd->scd_lock); - - kfree(buf); - - spin_lock(&scd->scd_lock); - return rc; -} - -void -srpc_add_buffer(struct swi_workitem *wi) -{ - struct srpc_service_cd *scd = container_of(wi, struct srpc_service_cd, scd_buf_wi); - struct srpc_buffer *buf; - int rc = 0; - - /* - * it's called by workitem scheduler threads, these threads - * should have been set CPT affinity, so buffers will be posted - * on CPT local list of Portal - */ - spin_lock(&scd->scd_lock); - - while (scd->scd_buf_adjust > 0 && - !scd->scd_svc->sv_shuttingdown) { - scd->scd_buf_adjust--; /* consume it */ - scd->scd_buf_posting++; - - spin_unlock(&scd->scd_lock); - - buf = kzalloc(sizeof(*buf), GFP_NOFS); - if (!buf) { - CERROR("Failed to add new buf to service: %s\n", - scd->scd_svc->sv_name); - spin_lock(&scd->scd_lock); - rc = -ENOMEM; - break; - } - - spin_lock(&scd->scd_lock); - if (scd->scd_svc->sv_shuttingdown) { - spin_unlock(&scd->scd_lock); - kfree(buf); - - spin_lock(&scd->scd_lock); - rc = -ESHUTDOWN; - break; - } - - rc = srpc_service_post_buffer(scd, buf); - if (rc) - break; /* buf has been freed inside */ - - LASSERT(scd->scd_buf_posting > 0); - scd->scd_buf_posting--; - scd->scd_buf_total++; - scd->scd_buf_low = max(2, scd->scd_buf_total / 4); - } - - if (rc) { - scd->scd_buf_err_stamp = ktime_get_real_seconds(); - scd->scd_buf_err = rc; - - LASSERT(scd->scd_buf_posting > 0); - scd->scd_buf_posting--; - } - - spin_unlock(&scd->scd_lock); -} - -int -srpc_service_add_buffers(struct srpc_service *sv, int nbuffer) -{ - struct srpc_service_cd *scd; - int rc = 0; - int i; - - LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - scd->scd_buf_err = 0; - scd->scd_buf_err_stamp = 0; - scd->scd_buf_posting = 0; - scd->scd_buf_adjust = nbuffer; - /* start to post buffers */ - swi_schedule_workitem(&scd->scd_buf_wi); - spin_unlock(&scd->scd_lock); - - /* framework service only post buffer for one partition */ - if (srpc_serv_is_framework(sv)) - break; - } - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - /* - * NB: srpc_service_add_buffers() can be called inside - * thread context of lst_serial_wq, and we don't normally - * allow to sleep inside thread context of WI scheduler - * because it will block current scheduler thread from doing - * anything else, even worse, it could deadlock if it's - * waiting on result from another WI of the same scheduler. - * However, it's safe at here because scd_buf_wi is scheduled - * by thread in a different WI scheduler (lst_test_wq), - * so we don't have any risk of deadlock, though this could - * block all WIs pending on lst_serial_wq for a moment - * which is not good but not fatal. - */ - lst_wait_until(scd->scd_buf_err || - (!scd->scd_buf_adjust && - !scd->scd_buf_posting), - scd->scd_lock, "waiting for adding buffer\n"); - - if (scd->scd_buf_err && !rc) - rc = scd->scd_buf_err; - - spin_unlock(&scd->scd_lock); - } - - return rc; -} - -void -srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer) -{ - struct srpc_service_cd *scd; - int num; - int i; - - LASSERT(!sv->sv_shuttingdown); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - num = scd->scd_buf_total + scd->scd_buf_posting; - scd->scd_buf_adjust -= min(nbuffer, num); - - spin_unlock(&scd->scd_lock); - } -} - -/* returns 1 if sv has finished, otherwise 0 */ -int -srpc_finish_service(struct srpc_service *sv) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - int i; - - LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */ - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - swi_cancel_workitem(&scd->scd_buf_wi); - - spin_lock(&scd->scd_lock); - - if (scd->scd_buf_nposted > 0) { - CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n", - scd->scd_buf_nposted); - spin_unlock(&scd->scd_lock); - return 0; - } - - if (list_empty(&scd->scd_rpc_active)) { - spin_unlock(&scd->scd_lock); - continue; - } - - rpc = list_entry(scd->scd_rpc_active.next, - struct srpc_server_rpc, srpc_list); - CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s, ev fired %d type %d status %d lnet %d\n", - rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), - rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type, - rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet); - spin_unlock(&scd->scd_lock); - return 0; - } - - /* no lock needed from now on */ - srpc_service_fini(sv); - return 1; -} - -/* called with sv->sv_lock held */ -static void -srpc_service_recycle_buffer(struct srpc_service_cd *scd, - struct srpc_buffer *buf) -__must_hold(&scd->scd_lock) -{ - if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) { - if (srpc_service_post_buffer(scd, buf)) { - CWARN("Failed to post %s buffer\n", - scd->scd_svc->sv_name); - } - return; - } - - /* service is shutting down, or we want to recycle some buffers */ - scd->scd_buf_total--; - - if (scd->scd_buf_adjust < 0) { - scd->scd_buf_adjust++; - if (scd->scd_buf_adjust < 0 && - !scd->scd_buf_total && !scd->scd_buf_posting) { - CDEBUG(D_INFO, - "Try to recycle %d buffers but nothing left\n", - scd->scd_buf_adjust); - scd->scd_buf_adjust = 0; - } - } - - spin_unlock(&scd->scd_lock); - kfree(buf); - spin_lock(&scd->scd_lock); -} - -void -srpc_abort_service(struct srpc_service *sv) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - int i; - - CDEBUG(D_NET, "Aborting service: id %d, name %s\n", - sv->sv_id, sv->sv_name); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - /* - * schedule in-flight RPCs to notice the abort, NB: - * racing with incoming RPCs; complete fix should make test - * RPCs carry session ID in its headers - */ - list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) { - rpc->srpc_aborted = 1; - swi_schedule_workitem(&rpc->srpc_wi); - } - - spin_unlock(&scd->scd_lock); - } -} - -void -srpc_shutdown_service(struct srpc_service *sv) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - struct srpc_buffer *buf; - int i; - - CDEBUG(D_NET, "Shutting down service: id %d, name %s\n", - sv->sv_id, sv->sv_name); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) - spin_lock(&scd->scd_lock); - - sv->sv_shuttingdown = 1; /* i.e. no new active RPC */ - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) - spin_unlock(&scd->scd_lock); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - /* schedule in-flight RPCs to notice the shutdown */ - list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) - swi_schedule_workitem(&rpc->srpc_wi); - - spin_unlock(&scd->scd_lock); - - /* - * OK to traverse scd_buf_posted without lock, since no one - * touches scd_buf_posted now - */ - list_for_each_entry(buf, &scd->scd_buf_posted, buf_list) - LNetMDUnlink(buf->buf_mdh); - } -} - -static int -srpc_send_request(struct srpc_client_rpc *rpc) -{ - struct srpc_event *ev = &rpc->crpc_reqstev; - int rc; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REQUEST_SENT; - - rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service), - rpc->crpc_service, &rpc->crpc_reqstmsg, - sizeof(struct srpc_msg), LNET_MD_OP_PUT, - rpc->crpc_dest, LNET_NID_ANY, - &rpc->crpc_reqstmdh, ev); - if (rc) { - LASSERT(rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -static int -srpc_prepare_reply(struct srpc_client_rpc *rpc) -{ - struct srpc_event *ev = &rpc->crpc_replyev; - __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid; - int rc; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_RCVD; - - *id = srpc_next_id(); - - rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id, - &rpc->crpc_replymsg, - sizeof(struct srpc_msg), - LNET_MD_OP_PUT, rpc->crpc_dest, - &rpc->crpc_replymdh, ev); - if (rc) { - LASSERT(rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -static int -srpc_prepare_bulk(struct srpc_client_rpc *rpc) -{ - struct srpc_bulk *bk = &rpc->crpc_bulk; - struct srpc_event *ev = &rpc->crpc_bulkev; - __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid; - int rc; - int opt; - - LASSERT(bk->bk_niov <= LNET_MAX_IOV); - - if (!bk->bk_niov) - return 0; /* nothing to do */ - - opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET; - opt |= LNET_MD_KIOV; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_BULK_REQ_RCVD; - - *id = srpc_next_id(); - - rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id, - &bk->bk_iovs[0], bk->bk_niov, opt, - rpc->crpc_dest, &bk->bk_mdh, ev); - if (rc) { - LASSERT(rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -static int -srpc_do_bulk(struct srpc_server_rpc *rpc) -{ - struct srpc_event *ev = &rpc->srpc_ev; - struct srpc_bulk *bk = rpc->srpc_bulk; - __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid; - int rc; - int opt; - - LASSERT(bk); - - opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT; - opt |= LNET_MD_KIOV; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT; - - rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id, - &bk->bk_iovs[0], bk->bk_niov, opt, - rpc->srpc_peer, rpc->srpc_self, - &bk->bk_mdh, ev); - if (rc) - ev->ev_fired = 1; /* no more event expected */ - return rc; -} - -/* only called from srpc_handle_rpc */ -static void -srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) -{ - struct srpc_service_cd *scd = rpc->srpc_scd; - struct srpc_service *sv = scd->scd_svc; - struct srpc_buffer *buffer; - - LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE); - - rpc->srpc_status = status; - - CDEBUG_LIMIT(!status ? D_NET : D_NETERROR, - "Server RPC %p done: service %s, peer %s, status %s:%d\n", - rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), status); - - if (status) { - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_dropped++; - spin_unlock(&srpc_data.rpc_glock); - } - - if (rpc->srpc_done) - (*rpc->srpc_done) (rpc); - LASSERT(!rpc->srpc_bulk); - - spin_lock(&scd->scd_lock); - - if (rpc->srpc_reqstbuf) { - /* - * NB might drop sv_lock in srpc_service_recycle_buffer, but - * sv won't go away for scd_rpc_active must not be empty - */ - srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf); - rpc->srpc_reqstbuf = NULL; - } - - list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */ - - /* - * No one can schedule me now since: - * - I'm not on scd_rpc_active. - * - all LNet events have been fired. - * Cancel pending schedules and prevent future schedule attempts: - */ - LASSERT(rpc->srpc_ev.ev_fired); - - if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) { - buffer = list_entry(scd->scd_buf_blocked.next, - struct srpc_buffer, buf_list); - list_del(&buffer->buf_list); - - srpc_init_server_rpc(rpc, scd, buffer); - list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active); - swi_schedule_workitem(&rpc->srpc_wi); - } else { - list_add(&rpc->srpc_list, &scd->scd_rpc_free); - } - - spin_unlock(&scd->scd_lock); -} - -/* handles an incoming RPC */ -void -srpc_handle_rpc(struct swi_workitem *wi) -{ - struct srpc_server_rpc *rpc = container_of(wi, struct srpc_server_rpc, srpc_wi); - struct srpc_service_cd *scd = rpc->srpc_scd; - struct srpc_service *sv = scd->scd_svc; - struct srpc_event *ev = &rpc->srpc_ev; - int rc = 0; - - LASSERT(wi == &rpc->srpc_wi); - - spin_lock(&scd->scd_lock); - - if (sv->sv_shuttingdown || rpc->srpc_aborted) { - spin_unlock(&scd->scd_lock); - - if (rpc->srpc_bulk) - LNetMDUnlink(rpc->srpc_bulk->bk_mdh); - LNetMDUnlink(rpc->srpc_replymdh); - - if (ev->ev_fired) { /* no more event, OK to finish */ - srpc_server_rpc_done(rpc, -ESHUTDOWN); - } - return; - } - - spin_unlock(&scd->scd_lock); - - switch (wi->swi_state) { - default: - LBUG(); - case SWI_STATE_NEWBORN: { - struct srpc_msg *msg; - struct srpc_generic_reply *reply; - - msg = &rpc->srpc_reqstbuf->buf_msg; - reply = &rpc->srpc_replymsg.msg_body.reply; - - if (!msg->msg_magic) { - /* moaned already in srpc_lnet_ev_handler */ - srpc_server_rpc_done(rpc, EBADMSG); - return; - } - - srpc_unpack_msg_hdr(msg); - if (msg->msg_version != SRPC_MSG_VERSION) { - CWARN("Version mismatch: %u, %u expected, from %s\n", - msg->msg_version, SRPC_MSG_VERSION, - libcfs_id2str(rpc->srpc_peer)); - reply->status = EPROTO; - /* drop through and send reply */ - } else { - reply->status = 0; - rc = (*sv->sv_handler)(rpc); - LASSERT(!reply->status || !rpc->srpc_bulk); - if (rc) { - srpc_server_rpc_done(rpc, rc); - return; - } - } - - wi->swi_state = SWI_STATE_BULK_STARTED; - - if (rpc->srpc_bulk) { - rc = srpc_do_bulk(rpc); - if (!rc) - return; /* wait for bulk */ - - LASSERT(ev->ev_fired); - ev->ev_status = rc; - } - } - /* fall through */ - case SWI_STATE_BULK_STARTED: - LASSERT(!rpc->srpc_bulk || ev->ev_fired); - - if (rpc->srpc_bulk) { - rc = ev->ev_status; - - if (sv->sv_bulk_ready) - rc = (*sv->sv_bulk_ready) (rpc, rc); - - if (rc) { - srpc_server_rpc_done(rpc, rc); - return; - } - } - - wi->swi_state = SWI_STATE_REPLY_SUBMITTED; - rc = srpc_send_reply(rpc); - if (!rc) - return; /* wait for reply */ - srpc_server_rpc_done(rpc, rc); - return; - - case SWI_STATE_REPLY_SUBMITTED: - if (!ev->ev_fired) { - CERROR("RPC %p: bulk %p, service %d\n", - rpc, rpc->srpc_bulk, sv->sv_id); - CERROR("Event: status %d, type %d, lnet %d\n", - ev->ev_status, ev->ev_type, ev->ev_lnet); - LASSERT(ev->ev_fired); - } - - wi->swi_state = SWI_STATE_DONE; - srpc_server_rpc_done(rpc, ev->ev_status); - return; - } -} - -static void -srpc_client_rpc_expired(void *data) -{ - struct srpc_client_rpc *rpc = data; - - CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - rpc->crpc_timeout); - - spin_lock(&rpc->crpc_lock); - - rpc->crpc_timeout = 0; - srpc_abort_rpc(rpc, -ETIMEDOUT); - - spin_unlock(&rpc->crpc_lock); - - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_expired++; - spin_unlock(&srpc_data.rpc_glock); -} - -static void -srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc) -{ - struct stt_timer *timer = &rpc->crpc_timer; - - if (!rpc->crpc_timeout) - return; - - INIT_LIST_HEAD(&timer->stt_list); - timer->stt_data = rpc; - timer->stt_func = srpc_client_rpc_expired; - timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout; - stt_add_timer(timer); -} - -/* - * Called with rpc->crpc_lock held. - * - * Upon exit the RPC expiry timer is not queued and the handler is not - * running on any CPU. - */ -static void -srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc) -{ - /* timer not planted or already exploded */ - if (!rpc->crpc_timeout) - return; - - /* timer successfully defused */ - if (stt_del_timer(&rpc->crpc_timer)) - return; - - /* timer detonated, wait for it to explode */ - while (rpc->crpc_timeout) { - spin_unlock(&rpc->crpc_lock); - - schedule(); - - spin_lock(&rpc->crpc_lock); - } -} - -static void -srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status) -{ - struct swi_workitem *wi = &rpc->crpc_wi; - - LASSERT(status || wi->swi_state == SWI_STATE_DONE); - - spin_lock(&rpc->crpc_lock); - - rpc->crpc_closed = 1; - if (!rpc->crpc_status) - rpc->crpc_status = status; - - srpc_del_client_rpc_timer(rpc); - - CDEBUG_LIMIT(!status ? D_NET : D_NETERROR, - "Client RPC done: service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(wi->swi_state), rpc->crpc_aborted, status); - - /* - * No one can schedule me now since: - * - RPC timer has been defused. - * - all LNet events have been fired. - * - crpc_closed has been set, preventing srpc_abort_rpc from - * scheduling me. - * Cancel pending schedules and prevent future schedule attempts: - */ - LASSERT(!srpc_event_pending(rpc)); - - spin_unlock(&rpc->crpc_lock); - - (*rpc->crpc_done)(rpc); -} - -/* sends an outgoing RPC */ -void -srpc_send_rpc(struct swi_workitem *wi) -{ - int rc = 0; - struct srpc_client_rpc *rpc; - struct srpc_msg *reply; - int do_bulk; - - LASSERT(wi); - - rpc = container_of(wi, struct srpc_client_rpc, crpc_wi); - - LASSERT(rpc); - LASSERT(wi == &rpc->crpc_wi); - - reply = &rpc->crpc_replymsg; - do_bulk = rpc->crpc_bulk.bk_niov > 0; - - spin_lock(&rpc->crpc_lock); - - if (rpc->crpc_aborted) { - spin_unlock(&rpc->crpc_lock); - goto abort; - } - - spin_unlock(&rpc->crpc_lock); - - switch (wi->swi_state) { - default: - LBUG(); - case SWI_STATE_NEWBORN: - LASSERT(!srpc_event_pending(rpc)); - - rc = srpc_prepare_reply(rpc); - if (rc) { - srpc_client_rpc_done(rpc, rc); - return; - } - - rc = srpc_prepare_bulk(rpc); - if (rc) - break; - - wi->swi_state = SWI_STATE_REQUEST_SUBMITTED; - rc = srpc_send_request(rpc); - break; - - case SWI_STATE_REQUEST_SUBMITTED: - /* - * CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any - * order; however, they're processed in a strict order: - * rqt, rpy, and bulk. - */ - if (!rpc->crpc_reqstev.ev_fired) - break; - - rc = rpc->crpc_reqstev.ev_status; - if (rc) - break; - - wi->swi_state = SWI_STATE_REQUEST_SENT; - /* perhaps more events */ - /* fall through */ - case SWI_STATE_REQUEST_SENT: { - enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service); - - if (!rpc->crpc_replyev.ev_fired) - break; - - rc = rpc->crpc_replyev.ev_status; - if (rc) - break; - - srpc_unpack_msg_hdr(reply); - if (reply->msg_type != type || - (reply->msg_magic != SRPC_MSG_MAGIC && - reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) { - CWARN("Bad message from %s: type %u (%d expected), magic %u (%d expected).\n", - libcfs_id2str(rpc->crpc_dest), - reply->msg_type, type, - reply->msg_magic, SRPC_MSG_MAGIC); - rc = -EBADMSG; - break; - } - - if (do_bulk && reply->msg_body.reply.status) { - CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n", - reply->msg_body.reply.status, - libcfs_id2str(rpc->crpc_dest)); - LNetMDUnlink(rpc->crpc_bulk.bk_mdh); - } - - wi->swi_state = SWI_STATE_REPLY_RECEIVED; - } - /* fall through */ - case SWI_STATE_REPLY_RECEIVED: - if (do_bulk && !rpc->crpc_bulkev.ev_fired) - break; - - rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0; - - /* - * Bulk buffer was unlinked due to remote error. Clear error - * since reply buffer still contains valid data. - * NB rpc->crpc_done shouldn't look into bulk data in case of - * remote error. - */ - if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK && - !rpc->crpc_status && reply->msg_body.reply.status) - rc = 0; - - wi->swi_state = SWI_STATE_DONE; - srpc_client_rpc_done(rpc, rc); - return; - } - - if (rc) { - spin_lock(&rpc->crpc_lock); - srpc_abort_rpc(rpc, rc); - spin_unlock(&rpc->crpc_lock); - } - -abort: - if (rpc->crpc_aborted) { - LNetMDUnlink(rpc->crpc_reqstmdh); - LNetMDUnlink(rpc->crpc_replymdh); - LNetMDUnlink(rpc->crpc_bulk.bk_mdh); - - if (!srpc_event_pending(rpc)) { - srpc_client_rpc_done(rpc, -EINTR); - return; - } - } -} - -struct srpc_client_rpc * -srpc_create_client_rpc(struct lnet_process_id peer, int service, - int nbulkiov, int bulklen, - void (*rpc_done)(struct srpc_client_rpc *), - void (*rpc_fini)(struct srpc_client_rpc *), void *priv) -{ - struct srpc_client_rpc *rpc; - - rpc = kzalloc(offsetof(struct srpc_client_rpc, - crpc_bulk.bk_iovs[nbulkiov]), GFP_KERNEL); - if (!rpc) - return NULL; - - srpc_init_client_rpc(rpc, peer, service, nbulkiov, - bulklen, rpc_done, rpc_fini, priv); - return rpc; -} - -/* called with rpc->crpc_lock held */ -void -srpc_abort_rpc(struct srpc_client_rpc *rpc, int why) -{ - LASSERT(why); - - if (rpc->crpc_aborted || /* already aborted */ - rpc->crpc_closed) /* callback imminent */ - return; - - CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.swi_state), why); - - rpc->crpc_aborted = 1; - rpc->crpc_status = why; - swi_schedule_workitem(&rpc->crpc_wi); -} - -/* called with rpc->crpc_lock held */ -void -srpc_post_rpc(struct srpc_client_rpc *rpc) -{ - LASSERT(!rpc->crpc_aborted); - LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING); - - CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_service, - rpc->crpc_timeout); - - srpc_add_client_rpc_timer(rpc); - swi_schedule_workitem(&rpc->crpc_wi); -} - -int -srpc_send_reply(struct srpc_server_rpc *rpc) -{ - struct srpc_event *ev = &rpc->srpc_ev; - struct srpc_msg *msg = &rpc->srpc_replymsg; - struct srpc_buffer *buffer = rpc->srpc_reqstbuf; - struct srpc_service_cd *scd = rpc->srpc_scd; - struct srpc_service *sv = scd->scd_svc; - __u64 rpyid; - int rc; - - LASSERT(buffer); - rpyid = buffer->buf_msg.msg_body.reqst.rpyid; - - spin_lock(&scd->scd_lock); - - if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) { - /* - * Repost buffer before replying since test client - * might send me another RPC once it gets the reply - */ - if (srpc_service_post_buffer(scd, buffer)) - CWARN("Failed to repost %s buffer\n", sv->sv_name); - rpc->srpc_reqstbuf = NULL; - } - - spin_unlock(&scd->scd_lock); - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_SENT; - - msg->msg_magic = SRPC_MSG_MAGIC; - msg->msg_version = SRPC_MSG_VERSION; - msg->msg_type = srpc_service2reply(sv->sv_id); - - rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg, - sizeof(*msg), LNET_MD_OP_PUT, - rpc->srpc_peer, rpc->srpc_self, - &rpc->srpc_replymdh, ev); - if (rc) - ev->ev_fired = 1; /* no more event expected */ - return rc; -} - -/* when in kernel always called with LNET_LOCK() held, and in thread context */ -static void -srpc_lnet_ev_handler(struct lnet_event *ev) -{ - struct srpc_service_cd *scd; - struct srpc_event *rpcev = ev->md.user_ptr; - struct srpc_client_rpc *crpc; - struct srpc_server_rpc *srpc; - struct srpc_buffer *buffer; - struct srpc_service *sv; - struct srpc_msg *msg; - enum srpc_msg_type type; - - LASSERT(!in_interrupt()); - - if (ev->status) { - __u32 errors; - - spin_lock(&srpc_data.rpc_glock); - if (ev->status != -ECANCELED) /* cancellation is not error */ - srpc_data.rpc_counters.errors++; - errors = srpc_data.rpc_counters.errors; - spin_unlock(&srpc_data.rpc_glock); - - CNETERR("LNet event status %d type %d, RPC errors %u\n", - ev->status, ev->type, errors); - } - - rpcev->ev_lnet = ev->type; - - switch (rpcev->ev_type) { - default: - CERROR("Unknown event: status %d, type %d, lnet %d\n", - rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet); - LBUG(); - case SRPC_REQUEST_SENT: - if (!ev->status && ev->type != LNET_EVENT_UNLINK) { - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_sent++; - spin_unlock(&srpc_data.rpc_glock); - } - /* fall through */ - case SRPC_REPLY_RCVD: - case SRPC_BULK_REQ_RCVD: - crpc = rpcev->ev_data; - - if (rpcev != &crpc->crpc_reqstev && - rpcev != &crpc->crpc_replyev && - rpcev != &crpc->crpc_bulkev) { - CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n", - rpcev, crpc, &crpc->crpc_reqstev, - &crpc->crpc_replyev, &crpc->crpc_bulkev); - CERROR("Bad event: status %d, type %d, lnet %d\n", - rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet); - LBUG(); - } - - spin_lock(&crpc->crpc_lock); - - LASSERT(!rpcev->ev_fired); - rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? - -EINTR : ev->status; - swi_schedule_workitem(&crpc->crpc_wi); - - spin_unlock(&crpc->crpc_lock); - break; - - case SRPC_REQUEST_RCVD: - scd = rpcev->ev_data; - sv = scd->scd_svc; - - LASSERT(rpcev == &scd->scd_ev); - - spin_lock(&scd->scd_lock); - - LASSERT(ev->unlinked); - LASSERT(ev->type == LNET_EVENT_PUT || - ev->type == LNET_EVENT_UNLINK); - LASSERT(ev->type != LNET_EVENT_UNLINK || - sv->sv_shuttingdown); - - buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg); - buffer->buf_peer = ev->initiator; - buffer->buf_self = ev->target.nid; - - LASSERT(scd->scd_buf_nposted > 0); - scd->scd_buf_nposted--; - - if (sv->sv_shuttingdown) { - /* - * Leave buffer on scd->scd_buf_nposted since - * srpc_finish_service needs to traverse it. - */ - spin_unlock(&scd->scd_lock); - break; - } - - if (scd->scd_buf_err_stamp && - scd->scd_buf_err_stamp < ktime_get_real_seconds()) { - /* re-enable adding buffer */ - scd->scd_buf_err_stamp = 0; - scd->scd_buf_err = 0; - } - - if (!scd->scd_buf_err && /* adding buffer is enabled */ - !scd->scd_buf_adjust && - scd->scd_buf_nposted < scd->scd_buf_low) { - scd->scd_buf_adjust = max(scd->scd_buf_total / 2, - SFW_TEST_WI_MIN); - swi_schedule_workitem(&scd->scd_buf_wi); - } - - list_del(&buffer->buf_list); /* from scd->scd_buf_posted */ - msg = &buffer->buf_msg; - type = srpc_service2request(sv->sv_id); - - if (ev->status || ev->mlength != sizeof(*msg) || - (msg->msg_type != type && - msg->msg_type != __swab32(type)) || - (msg->msg_magic != SRPC_MSG_MAGIC && - msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) { - CERROR("Dropping RPC (%s) from %s: status %d mlength %d type %u magic %u.\n", - sv->sv_name, libcfs_id2str(ev->initiator), - ev->status, ev->mlength, - msg->msg_type, msg->msg_magic); - - /* - * NB can't call srpc_service_recycle_buffer here since - * it may call LNetM[DE]Attach. The invalid magic tells - * srpc_handle_rpc to drop this RPC - */ - msg->msg_magic = 0; - } - - if (!list_empty(&scd->scd_rpc_free)) { - srpc = list_entry(scd->scd_rpc_free.next, - struct srpc_server_rpc, - srpc_list); - list_del(&srpc->srpc_list); - - srpc_init_server_rpc(srpc, scd, buffer); - list_add_tail(&srpc->srpc_list, - &scd->scd_rpc_active); - swi_schedule_workitem(&srpc->srpc_wi); - } else { - list_add_tail(&buffer->buf_list, - &scd->scd_buf_blocked); - } - - spin_unlock(&scd->scd_lock); - - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_rcvd++; - spin_unlock(&srpc_data.rpc_glock); - break; - - case SRPC_BULK_GET_RPLD: - LASSERT(ev->type == LNET_EVENT_SEND || - ev->type == LNET_EVENT_REPLY || - ev->type == LNET_EVENT_UNLINK); - - if (!ev->unlinked) - break; /* wait for final event */ - /* fall through */ - case SRPC_BULK_PUT_SENT: - if (!ev->status && ev->type != LNET_EVENT_UNLINK) { - spin_lock(&srpc_data.rpc_glock); - - if (rpcev->ev_type == SRPC_BULK_GET_RPLD) - srpc_data.rpc_counters.bulk_get += ev->mlength; - else - srpc_data.rpc_counters.bulk_put += ev->mlength; - - spin_unlock(&srpc_data.rpc_glock); - } - /* fall through */ - case SRPC_REPLY_SENT: - srpc = rpcev->ev_data; - scd = srpc->srpc_scd; - - LASSERT(rpcev == &srpc->srpc_ev); - - spin_lock(&scd->scd_lock); - - rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? - -EINTR : ev->status; - swi_schedule_workitem(&srpc->srpc_wi); - - spin_unlock(&scd->scd_lock); - break; - } -} - -int -srpc_startup(void) -{ - int rc; - - memset(&srpc_data, 0, sizeof(struct smoketest_rpc)); - spin_lock_init(&srpc_data.rpc_glock); - - /* 1 second pause to avoid timestamp reuse */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - srpc_data.rpc_matchbits = ((__u64)ktime_get_real_seconds()) << 48; - - srpc_data.rpc_state = SRPC_STATE_NONE; - - rc = LNetNIInit(LNET_PID_LUSTRE); - if (rc < 0) { - CERROR("LNetNIInit() has failed: %d\n", rc); - return rc; - } - - srpc_data.rpc_state = SRPC_STATE_NI_INIT; - - LNetInvalidateEQHandle(&srpc_data.rpc_lnet_eq); - rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq); - if (rc) { - CERROR("LNetEQAlloc() has failed: %d\n", rc); - goto bail; - } - - rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); - LASSERT(!rc); - rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL); - LASSERT(!rc); - - srpc_data.rpc_state = SRPC_STATE_EQ_INIT; - - rc = stt_startup(); - -bail: - if (rc) - srpc_shutdown(); - else - srpc_data.rpc_state = SRPC_STATE_RUNNING; - - return rc; -} - -void -srpc_shutdown(void) -{ - int i; - int rc; - int state; - - state = srpc_data.rpc_state; - srpc_data.rpc_state = SRPC_STATE_STOPPING; - - switch (state) { - default: - LBUG(); - case SRPC_STATE_RUNNING: - spin_lock(&srpc_data.rpc_glock); - - for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) { - struct srpc_service *sv = srpc_data.rpc_services[i]; - - LASSERTF(!sv, "service not empty: id %d, name %s\n", - i, sv->sv_name); - } - - spin_unlock(&srpc_data.rpc_glock); - - stt_shutdown(); - /* fall through */ - case SRPC_STATE_EQ_INIT: - rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); - rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL); - LASSERT(!rc); - rc = LNetEQFree(srpc_data.rpc_lnet_eq); - LASSERT(!rc); /* the EQ should have no user by now */ - /* fall through */ - case SRPC_STATE_NI_INIT: - LNetNIFini(); - } -} diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h deleted file mode 100644 index 465b5b534423..000000000000 --- a/drivers/staging/lustre/lnet/selftest/rpc.h +++ /dev/null @@ -1,295 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __SELFTEST_RPC_H__ -#define __SELFTEST_RPC_H__ - -#include <uapi/linux/lnet/lnetst.h> - -/* - * LST wired structures - * - * XXX: *REPLY == *REQST + 1 - */ -enum srpc_msg_type { - SRPC_MSG_MKSN_REQST = 0, - SRPC_MSG_MKSN_REPLY = 1, - SRPC_MSG_RMSN_REQST = 2, - SRPC_MSG_RMSN_REPLY = 3, - SRPC_MSG_BATCH_REQST = 4, - SRPC_MSG_BATCH_REPLY = 5, - SRPC_MSG_STAT_REQST = 6, - SRPC_MSG_STAT_REPLY = 7, - SRPC_MSG_TEST_REQST = 8, - SRPC_MSG_TEST_REPLY = 9, - SRPC_MSG_DEBUG_REQST = 10, - SRPC_MSG_DEBUG_REPLY = 11, - SRPC_MSG_BRW_REQST = 12, - SRPC_MSG_BRW_REPLY = 13, - SRPC_MSG_PING_REQST = 14, - SRPC_MSG_PING_REPLY = 15, - SRPC_MSG_JOIN_REQST = 16, - SRPC_MSG_JOIN_REPLY = 17, -}; - -/* CAVEAT EMPTOR: - * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer, - * and 2nd field matchbits of bulk buffer if any. - * - * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field - * session id if needed. - */ -struct srpc_generic_reqst { - __u64 rpyid; /* reply buffer matchbits */ - __u64 bulkid; /* bulk buffer matchbits */ -} WIRE_ATTR; - -struct srpc_generic_reply { - __u32 status; - struct lst_sid sid; -} WIRE_ATTR; - -/* FRAMEWORK RPCs */ -struct srpc_mksn_reqst { - __u64 mksn_rpyid; /* reply buffer matchbits */ - struct lst_sid mksn_sid; /* session id */ - __u32 mksn_force; /* use brute force */ - char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR; /* make session request */ - -struct srpc_mksn_reply { - __u32 mksn_status; /* session status */ - struct lst_sid mksn_sid; /* session id */ - __u32 mksn_timeout; /* session timeout */ - char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR; /* make session reply */ - -struct srpc_rmsn_reqst { - __u64 rmsn_rpyid; /* reply buffer matchbits */ - struct lst_sid rmsn_sid; /* session id */ -} WIRE_ATTR; /* remove session request */ - -struct srpc_rmsn_reply { - __u32 rmsn_status; - struct lst_sid rmsn_sid; /* session id */ -} WIRE_ATTR; /* remove session reply */ - -struct srpc_join_reqst { - __u64 join_rpyid; /* reply buffer matchbits */ - struct lst_sid join_sid; /* session id to join */ - char join_group[LST_NAME_SIZE]; /* group name */ -} WIRE_ATTR; - -struct srpc_join_reply { - __u32 join_status; /* returned status */ - struct lst_sid join_sid; /* session id */ - __u32 join_timeout; /* # seconds' inactivity to - * expire - */ - char join_session[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR; - -struct srpc_debug_reqst { - __u64 dbg_rpyid; /* reply buffer matchbits */ - struct lst_sid dbg_sid; /* session id */ - __u32 dbg_flags; /* bitmap of debug */ -} WIRE_ATTR; - -struct srpc_debug_reply { - __u32 dbg_status; /* returned code */ - struct lst_sid dbg_sid; /* session id */ - __u32 dbg_timeout; /* session timeout */ - __u32 dbg_nbatch; /* # of batches in the node */ - char dbg_name[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR; - -#define SRPC_BATCH_OPC_RUN 1 -#define SRPC_BATCH_OPC_STOP 2 -#define SRPC_BATCH_OPC_QUERY 3 - -struct srpc_batch_reqst { - __u64 bar_rpyid; /* reply buffer matchbits */ - struct lst_sid bar_sid; /* session id */ - struct lst_bid bar_bid; /* batch id */ - __u32 bar_opc; /* create/start/stop batch */ - __u32 bar_testidx; /* index of test */ - __u32 bar_arg; /* parameters */ -} WIRE_ATTR; - -struct srpc_batch_reply { - __u32 bar_status; /* status of request */ - struct lst_sid bar_sid; /* session id */ - __u32 bar_active; /* # of active tests in batch/test */ - __u32 bar_time; /* remained time */ -} WIRE_ATTR; - -struct srpc_stat_reqst { - __u64 str_rpyid; /* reply buffer matchbits */ - struct lst_sid str_sid; /* session id */ - __u32 str_type; /* type of stat */ -} WIRE_ATTR; - -struct srpc_stat_reply { - __u32 str_status; - struct lst_sid str_sid; - struct sfw_counters str_fw; - struct srpc_counters str_rpc; - struct lnet_counters str_lnet; -} WIRE_ATTR; - -struct test_bulk_req { - __u32 blk_opc; /* bulk operation code */ - __u32 blk_npg; /* # of pages */ - __u32 blk_flags; /* reserved flags */ -} WIRE_ATTR; - -struct test_bulk_req_v1 { - __u16 blk_opc; /* bulk operation code */ - __u16 blk_flags; /* data check flags */ - __u32 blk_len; /* data length */ - __u32 blk_offset; /* offset */ -} WIRE_ATTR; - -struct test_ping_req { - __u32 png_size; /* size of ping message */ - __u32 png_flags; /* reserved flags */ -} WIRE_ATTR; - -struct srpc_test_reqst { - __u64 tsr_rpyid; /* reply buffer matchbits */ - __u64 tsr_bulkid; /* bulk buffer matchbits */ - struct lst_sid tsr_sid; /* session id */ - struct lst_bid tsr_bid; /* batch id */ - __u32 tsr_service; /* test type: bulk|ping|... */ - __u32 tsr_loop; /* test client loop count or - * # server buffers needed - */ - __u32 tsr_concur; /* concurrency of test */ - __u8 tsr_is_client; /* is test client or not */ - __u8 tsr_stop_onerr; /* stop on error */ - __u32 tsr_ndest; /* # of dest nodes */ - - union { - struct test_ping_req ping; - struct test_bulk_req bulk_v0; - struct test_bulk_req_v1 bulk_v1; - } tsr_u; -} WIRE_ATTR; - -struct srpc_test_reply { - __u32 tsr_status; /* returned code */ - struct lst_sid tsr_sid; -} WIRE_ATTR; - -/* TEST RPCs */ -struct srpc_ping_reqst { - __u64 pnr_rpyid; - __u32 pnr_magic; - __u32 pnr_seq; - __u64 pnr_time_sec; - __u64 pnr_time_usec; -} WIRE_ATTR; - -struct srpc_ping_reply { - __u32 pnr_status; - __u32 pnr_magic; - __u32 pnr_seq; -} WIRE_ATTR; - -struct srpc_brw_reqst { - __u64 brw_rpyid; /* reply buffer matchbits */ - __u64 brw_bulkid; /* bulk buffer matchbits */ - __u32 brw_rw; /* read or write */ - __u32 brw_len; /* bulk data len */ - __u32 brw_flags; /* bulk data patterns */ -} WIRE_ATTR; /* bulk r/w request */ - -struct srpc_brw_reply { - __u32 brw_status; -} WIRE_ATTR; /* bulk r/w reply */ - -#define SRPC_MSG_MAGIC 0xeeb0f00d -#define SRPC_MSG_VERSION 1 - -struct srpc_msg { - __u32 msg_magic; /* magic number */ - __u32 msg_version; /* message version number */ - __u32 msg_type; /* type of message body: srpc_msg_type */ - __u32 msg_reserved0; - __u32 msg_reserved1; - __u32 msg_ses_feats; /* test session features */ - union { - struct srpc_generic_reqst reqst; - struct srpc_generic_reply reply; - - struct srpc_mksn_reqst mksn_reqst; - struct srpc_mksn_reply mksn_reply; - struct srpc_rmsn_reqst rmsn_reqst; - struct srpc_rmsn_reply rmsn_reply; - struct srpc_debug_reqst dbg_reqst; - struct srpc_debug_reply dbg_reply; - struct srpc_batch_reqst bat_reqst; - struct srpc_batch_reply bat_reply; - struct srpc_stat_reqst stat_reqst; - struct srpc_stat_reply stat_reply; - struct srpc_test_reqst tes_reqst; - struct srpc_test_reply tes_reply; - struct srpc_join_reqst join_reqst; - struct srpc_join_reply join_reply; - - struct srpc_ping_reqst ping_reqst; - struct srpc_ping_reply ping_reply; - struct srpc_brw_reqst brw_reqst; - struct srpc_brw_reply brw_reply; - } msg_body; -} WIRE_ATTR; - -static inline void -srpc_unpack_msg_hdr(struct srpc_msg *msg) -{ - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - /* - * We do not swap the magic number here as it is needed to - * determine whether the body needs to be swapped. - */ - /* __swab32s(&msg->msg_magic); */ - __swab32s(&msg->msg_type); - __swab32s(&msg->msg_version); - __swab32s(&msg->msg_ses_feats); - __swab32s(&msg->msg_reserved0); - __swab32s(&msg->msg_reserved1); -} - -#endif /* __SELFTEST_RPC_H__ */ diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h deleted file mode 100644 index 05466b85e1c0..000000000000 --- a/drivers/staging/lustre/lnet/selftest/selftest.h +++ /dev/null @@ -1,623 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/selftest.h - * - * Author: Isaac Huang <isaac@clusterfs.com> - */ -#ifndef __SELFTEST_SELFTEST_H__ -#define __SELFTEST_SELFTEST_H__ - -#define LNET_ONLY - -#include <linux/libcfs/libcfs.h> -#include <linux/lnet/lib-lnet.h> -#include <linux/lnet/lib-types.h> -#include <uapi/linux/lnet/lnetst.h> - -#include "rpc.h" -#include "timer.h" - -#ifndef MADE_WITHOUT_COMPROMISE -#define MADE_WITHOUT_COMPROMISE -#endif - -#define SWI_STATE_NEWBORN 0 -#define SWI_STATE_REPLY_SUBMITTED 1 -#define SWI_STATE_REPLY_SENT 2 -#define SWI_STATE_REQUEST_SUBMITTED 3 -#define SWI_STATE_REQUEST_SENT 4 -#define SWI_STATE_REPLY_RECEIVED 5 -#define SWI_STATE_BULK_STARTED 6 -#define SWI_STATE_DONE 10 - -/* forward refs */ -struct srpc_service; -struct srpc_service_cd; -struct sfw_test_unit; -struct sfw_test_instance; - -/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework - * services, e.g. create/modify session. - */ -#define SRPC_SERVICE_DEBUG 0 -#define SRPC_SERVICE_MAKE_SESSION 1 -#define SRPC_SERVICE_REMOVE_SESSION 2 -#define SRPC_SERVICE_BATCH 3 -#define SRPC_SERVICE_TEST 4 -#define SRPC_SERVICE_QUERY_STAT 5 -#define SRPC_SERVICE_JOIN 6 -#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10 -/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */ -#define SRPC_SERVICE_BRW 11 -#define SRPC_SERVICE_PING 12 -#define SRPC_SERVICE_MAX_ID 12 - -#define SRPC_REQUEST_PORTAL 50 -/* a lazy portal for framework RPC requests */ -#define SRPC_FRAMEWORK_REQUEST_PORTAL 51 -/* all reply/bulk RDMAs go to this portal */ -#define SRPC_RDMA_PORTAL 52 - -static inline enum srpc_msg_type -srpc_service2request(int service) -{ - switch (service) { - default: - LBUG(); - case SRPC_SERVICE_DEBUG: - return SRPC_MSG_DEBUG_REQST; - - case SRPC_SERVICE_MAKE_SESSION: - return SRPC_MSG_MKSN_REQST; - - case SRPC_SERVICE_REMOVE_SESSION: - return SRPC_MSG_RMSN_REQST; - - case SRPC_SERVICE_BATCH: - return SRPC_MSG_BATCH_REQST; - - case SRPC_SERVICE_TEST: - return SRPC_MSG_TEST_REQST; - - case SRPC_SERVICE_QUERY_STAT: - return SRPC_MSG_STAT_REQST; - - case SRPC_SERVICE_BRW: - return SRPC_MSG_BRW_REQST; - - case SRPC_SERVICE_PING: - return SRPC_MSG_PING_REQST; - - case SRPC_SERVICE_JOIN: - return SRPC_MSG_JOIN_REQST; - } -} - -static inline enum srpc_msg_type -srpc_service2reply(int service) -{ - return srpc_service2request(service) + 1; -} - -enum srpc_event_type { - SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source) - * received - */ - SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */ - SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */ - SRPC_REPLY_RCVD = 4, /* incoming reply received */ - SRPC_REPLY_SENT = 5, /* outgoing reply sent */ - SRPC_REQUEST_RCVD = 6, /* incoming request received */ - SRPC_REQUEST_SENT = 7, /* outgoing request sent */ -}; - -/* RPC event */ -struct srpc_event { - enum srpc_event_type ev_type; /* what's up */ - enum lnet_event_kind ev_lnet; /* LNet event type */ - int ev_fired; /* LNet event fired? */ - int ev_status; /* LNet event status */ - void *ev_data; /* owning server/client RPC */ -}; - -/* bulk descriptor */ -struct srpc_bulk { - int bk_len; /* len of bulk data */ - struct lnet_handle_md bk_mdh; - int bk_sink; /* sink/source */ - int bk_niov; /* # iov in bk_iovs */ - struct bio_vec bk_iovs[0]; -}; - -/* message buffer descriptor */ -struct srpc_buffer { - struct list_head buf_list; /* chain on srpc_service::*_msgq */ - struct srpc_msg buf_msg; - struct lnet_handle_md buf_mdh; - lnet_nid_t buf_self; - struct lnet_process_id buf_peer; -}; - -struct swi_workitem; -typedef void (*swi_action_t) (struct swi_workitem *); - -struct swi_workitem { - struct workqueue_struct *swi_wq; - struct work_struct swi_work; - swi_action_t swi_action; - int swi_state; -}; - -/* server-side state of a RPC */ -struct srpc_server_rpc { - /* chain on srpc_service::*_rpcq */ - struct list_head srpc_list; - struct srpc_service_cd *srpc_scd; - struct swi_workitem srpc_wi; - struct srpc_event srpc_ev; /* bulk/reply event */ - lnet_nid_t srpc_self; - struct lnet_process_id srpc_peer; - struct srpc_msg srpc_replymsg; - struct lnet_handle_md srpc_replymdh; - struct srpc_buffer *srpc_reqstbuf; - struct srpc_bulk *srpc_bulk; - - unsigned int srpc_aborted; /* being given up */ - int srpc_status; - void (*srpc_done)(struct srpc_server_rpc *); -}; - -/* client-side state of a RPC */ -struct srpc_client_rpc { - struct list_head crpc_list; /* chain on user's lists */ - spinlock_t crpc_lock; /* serialize */ - int crpc_service; - atomic_t crpc_refcount; - int crpc_timeout; /* # seconds to wait for reply */ - struct stt_timer crpc_timer; - struct swi_workitem crpc_wi; - struct lnet_process_id crpc_dest; - - void (*crpc_done)(struct srpc_client_rpc *); - void (*crpc_fini)(struct srpc_client_rpc *); - int crpc_status; /* completion status */ - void *crpc_priv; /* caller data */ - - /* state flags */ - unsigned int crpc_aborted:1; /* being given up */ - unsigned int crpc_closed:1; /* completed */ - - /* RPC events */ - struct srpc_event crpc_bulkev; /* bulk event */ - struct srpc_event crpc_reqstev; /* request event */ - struct srpc_event crpc_replyev; /* reply event */ - - /* bulk, request(reqst), and reply exchanged on wire */ - struct srpc_msg crpc_reqstmsg; - struct srpc_msg crpc_replymsg; - struct lnet_handle_md crpc_reqstmdh; - struct lnet_handle_md crpc_replymdh; - struct srpc_bulk crpc_bulk; -}; - -#define srpc_client_rpc_size(rpc) \ -offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov]) - -#define srpc_client_rpc_addref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - atomic_inc(&(rpc)->crpc_refcount); \ -} while (0) - -#define srpc_client_rpc_decref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \ - srpc_destroy_client_rpc(rpc); \ -} while (0) - -#define srpc_event_pending(rpc) (!(rpc)->crpc_bulkev.ev_fired || \ - !(rpc)->crpc_reqstev.ev_fired || \ - !(rpc)->crpc_replyev.ev_fired) - -/* CPU partition data of srpc service */ -struct srpc_service_cd { - /** serialize */ - spinlock_t scd_lock; - /** backref to service */ - struct srpc_service *scd_svc; - /** event buffer */ - struct srpc_event scd_ev; - /** free RPC descriptors */ - struct list_head scd_rpc_free; - /** in-flight RPCs */ - struct list_head scd_rpc_active; - /** workitem for posting buffer */ - struct swi_workitem scd_buf_wi; - /** CPT id */ - int scd_cpt; - /** error code for scd_buf_wi */ - int scd_buf_err; - /** timestamp for scd_buf_err */ - time64_t scd_buf_err_stamp; - /** total # request buffers */ - int scd_buf_total; - /** # posted request buffers */ - int scd_buf_nposted; - /** in progress of buffer posting */ - int scd_buf_posting; - /** allocate more buffers if scd_buf_nposted < scd_buf_low */ - int scd_buf_low; - /** increase/decrease some buffers */ - int scd_buf_adjust; - /** posted message buffers */ - struct list_head scd_buf_posted; - /** blocked for RPC descriptor */ - struct list_head scd_buf_blocked; -}; - -/* number of server workitems (mini-thread) for testing service */ -#define SFW_TEST_WI_MIN 256 -#define SFW_TEST_WI_MAX 2048 -/* extra buffers for tolerating buggy peers, or unbalanced number - * of peers between partitions - */ -#define SFW_TEST_WI_EXTRA 64 - -/* number of server workitems (mini-thread) for framework service */ -#define SFW_FRWK_WI_MIN 16 -#define SFW_FRWK_WI_MAX 256 - -struct srpc_service { - int sv_id; /* service id */ - const char *sv_name; /* human readable name */ - int sv_wi_total; /* total server workitems */ - int sv_shuttingdown; - int sv_ncpts; - /* percpt data for srpc_service */ - struct srpc_service_cd **sv_cpt_data; - /* Service callbacks: - * - sv_handler: process incoming RPC request - * - sv_bulk_ready: notify bulk data - */ - int (*sv_handler)(struct srpc_server_rpc *); - int (*sv_bulk_ready)(struct srpc_server_rpc *, int); -}; - -struct sfw_session { - struct list_head sn_list; /* chain on fw_zombie_sessions */ - struct lst_sid sn_id; /* unique identifier */ - unsigned int sn_timeout; /* # seconds' inactivity to expire */ - int sn_timer_active; - unsigned int sn_features; - struct stt_timer sn_timer; - struct list_head sn_batches; /* list of batches */ - char sn_name[LST_NAME_SIZE]; - atomic_t sn_refcount; - atomic_t sn_brw_errors; - atomic_t sn_ping_errors; - unsigned long sn_started; -}; - -#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \ - (sid0).ses_stamp == (sid1).ses_stamp) - -struct sfw_batch { - struct list_head bat_list; /* chain on sn_batches */ - struct lst_bid bat_id; /* batch id */ - int bat_error; /* error code of batch */ - struct sfw_session *bat_session; /* batch's session */ - atomic_t bat_nactive; /* # of active tests */ - struct list_head bat_tests; /* test instances */ -}; - -struct sfw_test_client_ops { - int (*tso_init)(struct sfw_test_instance *tsi); /* initialize test - * client - */ - void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test - * client - */ - int (*tso_prep_rpc)(struct sfw_test_unit *tsu, - struct lnet_process_id dest, - struct srpc_client_rpc **rpc); /* prep a tests rpc */ - void (*tso_done_rpc)(struct sfw_test_unit *tsu, - struct srpc_client_rpc *rpc); /* done a test rpc */ -}; - -struct sfw_test_instance { - struct list_head tsi_list; /* chain on batch */ - int tsi_service; /* test type */ - struct sfw_batch *tsi_batch; /* batch */ - struct sfw_test_client_ops *tsi_ops; /* test client operation - */ - - /* public parameter for all test units */ - unsigned int tsi_is_client:1; /* is test client */ - unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */ - int tsi_concur; /* concurrency */ - int tsi_loop; /* loop count */ - - /* status of test instance */ - spinlock_t tsi_lock; /* serialize */ - unsigned int tsi_stopping:1; /* test is stopping */ - atomic_t tsi_nactive; /* # of active test - * unit - */ - struct list_head tsi_units; /* test units */ - struct list_head tsi_free_rpcs; /* free rpcs */ - struct list_head tsi_active_rpcs; /* active rpcs */ - - union { - struct test_ping_req ping; /* ping parameter */ - struct test_bulk_req bulk_v0; /* bulk parameter */ - struct test_bulk_req_v1 bulk_v1; /* bulk v1 parameter */ - } tsi_u; -}; - -/* - * XXX: trailing (PAGE_SIZE % sizeof(struct lnet_process_id)) bytes at the end - * of pages are not used - */ -#define SFW_MAX_CONCUR LST_MAX_CONCUR -#define SFW_ID_PER_PAGE (PAGE_SIZE / sizeof(struct lnet_process_id_packed)) -#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE) -#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE) - -struct sfw_test_unit { - struct list_head tsu_list; /* chain on lst_test_instance */ - struct lnet_process_id tsu_dest; /* id of dest node */ - int tsu_loop; /* loop count of the test */ - struct sfw_test_instance *tsu_instance; /* pointer to test instance */ - void *tsu_private; /* private data */ - struct swi_workitem tsu_worker; /* workitem of the test unit */ -}; - -struct sfw_test_case { - struct list_head tsc_list; /* chain on fw_tests */ - struct srpc_service *tsc_srv_service; /* test service */ - struct sfw_test_client_ops *tsc_cli_ops; /* ops of test client */ -}; - -struct srpc_client_rpc * -sfw_create_rpc(struct lnet_process_id peer, int service, - unsigned int features, int nbulkiov, int bulklen, - void (*done)(struct srpc_client_rpc *), void *priv); -int sfw_create_test_rpc(struct sfw_test_unit *tsu, - struct lnet_process_id peer, unsigned int features, - int nblk, int blklen, struct srpc_client_rpc **rpc); -void sfw_abort_rpc(struct srpc_client_rpc *rpc); -void sfw_post_rpc(struct srpc_client_rpc *rpc); -void sfw_client_rpc_done(struct srpc_client_rpc *rpc); -void sfw_unpack_message(struct srpc_msg *msg); -void sfw_free_pages(struct srpc_server_rpc *rpc); -void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i); -int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, - int sink); -int sfw_make_session(struct srpc_mksn_reqst *request, - struct srpc_mksn_reply *reply); - -struct srpc_client_rpc * -srpc_create_client_rpc(struct lnet_process_id peer, int service, - int nbulkiov, int bulklen, - void (*rpc_done)(struct srpc_client_rpc *), - void (*rpc_fini)(struct srpc_client_rpc *), void *priv); -void srpc_post_rpc(struct srpc_client_rpc *rpc); -void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why); -void srpc_free_bulk(struct srpc_bulk *bk); -struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off, - unsigned int bulk_npg, unsigned int bulk_len, - int sink); -void srpc_send_rpc(struct swi_workitem *wi); -int srpc_send_reply(struct srpc_server_rpc *rpc); -int srpc_add_service(struct srpc_service *sv); -int srpc_remove_service(struct srpc_service *sv); -void srpc_shutdown_service(struct srpc_service *sv); -void srpc_abort_service(struct srpc_service *sv); -int srpc_finish_service(struct srpc_service *sv); -int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer); -void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer); -void srpc_get_counters(struct srpc_counters *cnt); -void srpc_set_counters(const struct srpc_counters *cnt); - -extern struct workqueue_struct *lst_serial_wq; -extern struct workqueue_struct **lst_test_wq; - -static inline int -srpc_serv_is_framework(struct srpc_service *svc) -{ - return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID; -} - -static void -swi_wi_action(struct work_struct *wi) -{ - struct swi_workitem *swi; - - swi = container_of(wi, struct swi_workitem, swi_work); - - swi->swi_action(swi); -} - -static inline void -swi_init_workitem(struct swi_workitem *swi, - swi_action_t action, struct workqueue_struct *wq) -{ - swi->swi_wq = wq; - swi->swi_action = action; - swi->swi_state = SWI_STATE_NEWBORN; - INIT_WORK(&swi->swi_work, swi_wi_action); -} - -static inline void -swi_schedule_workitem(struct swi_workitem *wi) -{ - queue_work(wi->swi_wq, &wi->swi_work); -} - -static inline int -swi_cancel_workitem(struct swi_workitem *swi) -{ - return cancel_work_sync(&swi->swi_work); -} - -int sfw_startup(void); -int srpc_startup(void); -void sfw_shutdown(void); -void srpc_shutdown(void); - -static inline void -srpc_destroy_client_rpc(struct srpc_client_rpc *rpc) -{ - LASSERT(rpc); - LASSERT(!srpc_event_pending(rpc)); - LASSERT(!atomic_read(&rpc->crpc_refcount)); - - if (!rpc->crpc_fini) - kfree(rpc); - else - (*rpc->crpc_fini)(rpc); -} - -static inline void -srpc_init_client_rpc(struct srpc_client_rpc *rpc, struct lnet_process_id peer, - int service, int nbulkiov, int bulklen, - void (*rpc_done)(struct srpc_client_rpc *), - void (*rpc_fini)(struct srpc_client_rpc *), void *priv) -{ - LASSERT(nbulkiov <= LNET_MAX_IOV); - - memset(rpc, 0, offsetof(struct srpc_client_rpc, - crpc_bulk.bk_iovs[nbulkiov])); - - INIT_LIST_HEAD(&rpc->crpc_list); - swi_init_workitem(&rpc->crpc_wi, srpc_send_rpc, - lst_test_wq[lnet_cpt_of_nid(peer.nid)]); - spin_lock_init(&rpc->crpc_lock); - atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */ - - rpc->crpc_dest = peer; - rpc->crpc_priv = priv; - rpc->crpc_service = service; - rpc->crpc_bulk.bk_len = bulklen; - rpc->crpc_bulk.bk_niov = nbulkiov; - rpc->crpc_done = rpc_done; - rpc->crpc_fini = rpc_fini; - LNetInvalidateMDHandle(&rpc->crpc_reqstmdh); - LNetInvalidateMDHandle(&rpc->crpc_replymdh); - LNetInvalidateMDHandle(&rpc->crpc_bulk.bk_mdh); - - /* no event is expected at this point */ - rpc->crpc_bulkev.ev_fired = 1; - rpc->crpc_reqstev.ev_fired = 1; - rpc->crpc_replyev.ev_fired = 1; - - rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC; - rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION; - rpc->crpc_reqstmsg.msg_type = srpc_service2request(service); -} - -static inline const char * -swi_state2str(int state) -{ -#define STATE2STR(x) case x: return #x - switch (state) { - default: - LBUG(); - STATE2STR(SWI_STATE_NEWBORN); - STATE2STR(SWI_STATE_REPLY_SUBMITTED); - STATE2STR(SWI_STATE_REPLY_SENT); - STATE2STR(SWI_STATE_REQUEST_SUBMITTED); - STATE2STR(SWI_STATE_REQUEST_SENT); - STATE2STR(SWI_STATE_REPLY_RECEIVED); - STATE2STR(SWI_STATE_BULK_STARTED); - STATE2STR(SWI_STATE_DONE); - } -#undef STATE2STR -} - -#define selftest_wait_events() \ - do { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - schedule_timeout(HZ / 10); \ - } while (0) - -#define lst_wait_until(cond, lock, fmt, ...) \ -do { \ - int __I = 2; \ - while (!(cond)) { \ - CDEBUG(is_power_of_2(++__I) ? D_WARNING : D_NET, \ - fmt, ## __VA_ARGS__); \ - spin_unlock(&(lock)); \ - \ - selftest_wait_events(); \ - \ - spin_lock(&(lock)); \ - } \ -} while (0) - -static inline void -srpc_wait_service_shutdown(struct srpc_service *sv) -{ - int i = 2; - - LASSERT(sv->sv_shuttingdown); - - while (!srpc_finish_service(sv)) { - i++; - CDEBUG(((i & -i) == i) ? D_WARNING : D_NET, - "Waiting for %s service to shutdown...\n", - sv->sv_name); - selftest_wait_events(); - } -} - -extern struct sfw_test_client_ops brw_test_client; -void brw_init_test_client(void); - -extern struct srpc_service brw_test_service; -void brw_init_test_service(void); - -extern struct sfw_test_client_ops ping_test_client; -void ping_init_test_client(void); - -extern struct srpc_service ping_test_service; -void ping_init_test_service(void); - -#endif /* __SELFTEST_SELFTEST_H__ */ diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c deleted file mode 100644 index 1b2c5fc81358..000000000000 --- a/drivers/staging/lustre/lnet/selftest/timer.c +++ /dev/null @@ -1,244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/timer.c - * - * Author: Isaac Huang <isaac@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - -/* - * Timers are implemented as a sorted queue of expiry times. The queue - * is slotted, with each slot holding timers which expire in a - * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are - * sorted by increasing expiry time. The number of slots is 2**7 (128), - * to cover a time period of 1024 seconds into the future before wrapping. - */ -#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */ -#define STTIMER_SLOTTIME BIT(STTIMER_MINPOLL) -#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1)) -#define STTIMER_NSLOTS BIT(7) -#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \ - (STTIMER_NSLOTS - 1))]) - -static struct st_timer_data { - spinlock_t stt_lock; - unsigned long stt_prev_slot; /* start time of the slot processed - * previously - */ - struct list_head stt_hash[STTIMER_NSLOTS]; - int stt_shuttingdown; - wait_queue_head_t stt_waitq; - int stt_nthreads; -} stt_data; - -void -stt_add_timer(struct stt_timer *timer) -{ - struct list_head *pos; - - spin_lock(&stt_data.stt_lock); - - LASSERT(stt_data.stt_nthreads > 0); - LASSERT(!stt_data.stt_shuttingdown); - LASSERT(timer->stt_func); - LASSERT(list_empty(&timer->stt_list)); - LASSERT(timer->stt_expires > ktime_get_real_seconds()); - - /* a simple insertion sort */ - list_for_each_prev(pos, STTIMER_SLOT(timer->stt_expires)) { - struct stt_timer *old = list_entry(pos, struct stt_timer, - stt_list); - - if (timer->stt_expires >= old->stt_expires) - break; - } - list_add(&timer->stt_list, pos); - - spin_unlock(&stt_data.stt_lock); -} - -/* - * The function returns whether it has deactivated a pending timer or not. - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an - * active timer returns 1.) - * - * CAVEAT EMPTOR: - * When 0 is returned, it is possible that timer->stt_func _is_ running on - * another CPU. - */ -int -stt_del_timer(struct stt_timer *timer) -{ - int ret = 0; - - spin_lock(&stt_data.stt_lock); - - LASSERT(stt_data.stt_nthreads > 0); - LASSERT(!stt_data.stt_shuttingdown); - - if (!list_empty(&timer->stt_list)) { - ret = 1; - list_del_init(&timer->stt_list); - } - - spin_unlock(&stt_data.stt_lock); - return ret; -} - -/* called with stt_data.stt_lock held */ -static int -stt_expire_list(struct list_head *slot, time64_t now) -{ - int expired = 0; - struct stt_timer *timer; - - while (!list_empty(slot)) { - timer = list_entry(slot->next, struct stt_timer, stt_list); - - if (timer->stt_expires > now) - break; - - list_del_init(&timer->stt_list); - spin_unlock(&stt_data.stt_lock); - - expired++; - (*timer->stt_func) (timer->stt_data); - - spin_lock(&stt_data.stt_lock); - } - - return expired; -} - -static int -stt_check_timers(unsigned long *last) -{ - int expired = 0; - time64_t now; - unsigned long this_slot; - - now = ktime_get_real_seconds(); - this_slot = now & STTIMER_SLOTTIMEMASK; - - spin_lock(&stt_data.stt_lock); - - while (cfs_time_aftereq(this_slot, *last)) { - expired += stt_expire_list(STTIMER_SLOT(this_slot), now); - this_slot = cfs_time_sub(this_slot, STTIMER_SLOTTIME); - } - - *last = now & STTIMER_SLOTTIMEMASK; - spin_unlock(&stt_data.stt_lock); - return expired; -} - -static int -stt_timer_main(void *arg) -{ - int rc = 0; - - while (!stt_data.stt_shuttingdown) { - stt_check_timers(&stt_data.stt_prev_slot); - - rc = wait_event_timeout(stt_data.stt_waitq, - stt_data.stt_shuttingdown, - STTIMER_SLOTTIME * HZ); - } - - spin_lock(&stt_data.stt_lock); - stt_data.stt_nthreads--; - spin_unlock(&stt_data.stt_lock); - return rc; -} - -static int -stt_start_timer_thread(void) -{ - struct task_struct *task; - - LASSERT(!stt_data.stt_shuttingdown); - - task = kthread_run(stt_timer_main, NULL, "st_timer"); - if (IS_ERR(task)) - return PTR_ERR(task); - - spin_lock(&stt_data.stt_lock); - stt_data.stt_nthreads++; - spin_unlock(&stt_data.stt_lock); - return 0; -} - -int -stt_startup(void) -{ - int rc = 0; - int i; - - stt_data.stt_shuttingdown = 0; - stt_data.stt_prev_slot = ktime_get_real_seconds() & STTIMER_SLOTTIMEMASK; - - spin_lock_init(&stt_data.stt_lock); - for (i = 0; i < STTIMER_NSLOTS; i++) - INIT_LIST_HEAD(&stt_data.stt_hash[i]); - - stt_data.stt_nthreads = 0; - init_waitqueue_head(&stt_data.stt_waitq); - rc = stt_start_timer_thread(); - if (rc) - CERROR("Can't spawn timer thread: %d\n", rc); - - return rc; -} - -void -stt_shutdown(void) -{ - int i; - - spin_lock(&stt_data.stt_lock); - - for (i = 0; i < STTIMER_NSLOTS; i++) - LASSERT(list_empty(&stt_data.stt_hash[i])); - - stt_data.stt_shuttingdown = 1; - - wake_up(&stt_data.stt_waitq); - lst_wait_until(!stt_data.stt_nthreads, stt_data.stt_lock, - "waiting for %d threads to terminate\n", - stt_data.stt_nthreads); - - spin_unlock(&stt_data.stt_lock); -} diff --git a/drivers/staging/lustre/lnet/selftest/timer.h b/drivers/staging/lustre/lnet/selftest/timer.h deleted file mode 100644 index 7f0ef9bd0cda..000000000000 --- a/drivers/staging/lustre/lnet/selftest/timer.h +++ /dev/null @@ -1,50 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/timer.h - * - * Author: Isaac Huang <isaac@clusterfs.com> - */ -#ifndef __SELFTEST_TIMER_H__ -#define __SELFTEST_TIMER_H__ - -struct stt_timer { - struct list_head stt_list; - time64_t stt_expires; - void (*stt_func)(void *); - void *stt_data; -}; - -void stt_add_timer(struct stt_timer *timer); -int stt_del_timer(struct stt_timer *timer); -int stt_startup(void); -void stt_shutdown(void); - -#endif /* __SELFTEST_TIMER_H__ */ |