aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lnet
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lnet')
-rw-r--r--drivers/staging/lustre/lnet/Kconfig46
-rw-r--r--drivers/staging/lustre/lnet/Makefile1
-rw-r--r--drivers/staging/lustre/lnet/klnds/Makefile1
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile5
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c2952
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h1038
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c3751
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c287
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/Makefile6
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c2918
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h705
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c2592
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c533
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c184
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c810
-rw-r--r--drivers/staging/lustre/lnet/libcfs/Makefile19
-rw-r--r--drivers/staging/lustre/lnet/libcfs/debug.c458
-rw-r--r--drivers/staging/lustre/lnet/libcfs/fail.c142
-rw-r--r--drivers/staging/lustre/lnet/libcfs/hash.c2064
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c228
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_lock.c152
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_mem.c167
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_string.c556
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c1079
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c139
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c443
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h30
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c145
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-module.c197
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c257
-rw-r--r--drivers/staging/lustre/lnet/libcfs/module.c604
-rw-r--r--drivers/staging/lustre/lnet/libcfs/tracefile.c1191
-rw-r--r--drivers/staging/lustre/lnet/libcfs/tracefile.h263
-rw-r--r--drivers/staging/lustre/lnet/lnet/Makefile10
-rw-r--r--drivers/staging/lustre/lnet/lnet/acceptor.c501
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-ni.c2307
-rw-r--r--drivers/staging/lustre/lnet/lnet/config.c1234
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-eq.c426
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-md.c463
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-me.c274
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c2388
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-msg.c625
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-ptl.c987
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c586
-rw-r--r--drivers/staging/lustre/lnet/lnet/lo.c105
-rw-r--r--drivers/staging/lustre/lnet/lnet/module.c223
-rw-r--r--drivers/staging/lustre/lnet/lnet/net_fault.c1023
-rw-r--r--drivers/staging/lustre/lnet/lnet/nidstrings.c1258
-rw-r--r--drivers/staging/lustre/lnet/lnet/peer.c456
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c1800
-rw-r--r--drivers/staging/lustre/lnet/lnet/router_proc.c909
-rw-r--r--drivers/staging/lustre/lnet/selftest/Makefile7
-rw-r--r--drivers/staging/lustre/lnet/selftest/brw_test.c526
-rw-r--r--drivers/staging/lustre/lnet/selftest/conctl.c799
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.c1397
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.h143
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.c2101
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.h244
-rw-r--r--drivers/staging/lustre/lnet/selftest/framework.c1786
-rw-r--r--drivers/staging/lustre/lnet/selftest/module.c165
-rw-r--r--drivers/staging/lustre/lnet/selftest/ping_test.c228
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.c1682
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.h295
-rw-r--r--drivers/staging/lustre/lnet/selftest/selftest.h623
-rw-r--r--drivers/staging/lustre/lnet/selftest/timer.c244
-rw-r--r--drivers/staging/lustre/lnet/selftest/timer.h50
66 files changed, 0 insertions, 49828 deletions
diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig
deleted file mode 100644
index ad049e6f24e4..000000000000
--- a/drivers/staging/lustre/lnet/Kconfig
+++ /dev/null
@@ -1,46 +0,0 @@
-config LNET
- tristate "Lustre networking subsystem (LNet)"
- depends on INET
- help
- The Lustre network layer, also known as LNet, is a networking abstaction
- level API that was initially created to allow Lustre Filesystem to utilize
- very different networks like tcp and ib verbs in a uniform way. In the
- case of Lustre routers only the LNet layer is required. Lately other
- projects are also looking into using LNet as their networking API as well.
-
-config LNET_MAX_PAYLOAD
- int "Lustre lnet max transfer payload (default 1MB)"
- depends on LNET
- default "1048576"
- help
- This option defines the maximum size of payload in bytes that lnet
- can put into its transport.
-
- If unsure, use default.
-
-config LNET_SELFTEST
- tristate "Lustre networking self testing"
- depends on LNET
- help
- Choose Y here if you want to do lnet self testing. To compile this
- as a module, choose M here: the module will be called lnet_selftest.
-
- To compile this as a kernel modules, choose M here and it will be
- called lnet_selftest.
-
- If unsure, say N.
-
- See also http://wiki.lustre.org/
-
-config LNET_XPRT_IB
- tristate "LNET infiniband support"
- depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS
- default LNET && INFINIBAND
- help
- This option allows the LNET users to use infiniband as an
- RDMA-enabled transport.
-
- To compile this as a kernel module, choose M here and it will be
- called ko2iblnd.
-
- If unsure, say N.
diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile
deleted file mode 100644
index 0a380fe88ce8..000000000000
--- a/drivers/staging/lustre/lnet/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/
diff --git a/drivers/staging/lustre/lnet/klnds/Makefile b/drivers/staging/lustre/lnet/klnds/Makefile
deleted file mode 100644
index c23e4f67f837..000000000000
--- a/drivers/staging/lustre/lnet/klnds/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_LNET) += o2iblnd/ socklnd/
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
deleted file mode 100644
index 4affe1d79948..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o
-ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
deleted file mode 100644
index 7ae2955c4db6..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ /dev/null
@@ -1,2952 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <asm/div64.h>
-#include <asm/page.h>
-#include "o2iblnd.h"
-
-static struct lnet_lnd the_o2iblnd;
-
-struct kib_data kiblnd_data;
-
-static __u32 kiblnd_cksum(void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return !sum ? 1 : sum;
-}
-
-static char *kiblnd_msgtype2str(int type)
-{
- switch (type) {
- case IBLND_MSG_CONNREQ:
- return "CONNREQ";
-
- case IBLND_MSG_CONNACK:
- return "CONNACK";
-
- case IBLND_MSG_NOOP:
- return "NOOP";
-
- case IBLND_MSG_IMMEDIATE:
- return "IMMEDIATE";
-
- case IBLND_MSG_PUT_REQ:
- return "PUT_REQ";
-
- case IBLND_MSG_PUT_NAK:
- return "PUT_NAK";
-
- case IBLND_MSG_PUT_ACK:
- return "PUT_ACK";
-
- case IBLND_MSG_PUT_DONE:
- return "PUT_DONE";
-
- case IBLND_MSG_GET_REQ:
- return "GET_REQ";
-
- case IBLND_MSG_GET_DONE:
- return "GET_DONE";
-
- default:
- return "???";
- }
-}
-
-static int kiblnd_msgtype2size(int type)
-{
- const int hdr_size = offsetof(struct kib_msg, ibm_u);
-
- switch (type) {
- case IBLND_MSG_CONNREQ:
- case IBLND_MSG_CONNACK:
- return hdr_size + sizeof(struct kib_connparams);
-
- case IBLND_MSG_NOOP:
- return hdr_size;
-
- case IBLND_MSG_IMMEDIATE:
- return offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[0]);
-
- case IBLND_MSG_PUT_REQ:
- return hdr_size + sizeof(struct kib_putreq_msg);
-
- case IBLND_MSG_PUT_ACK:
- return hdr_size + sizeof(struct kib_putack_msg);
-
- case IBLND_MSG_GET_REQ:
- return hdr_size + sizeof(struct kib_get_msg);
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- return hdr_size + sizeof(struct kib_completion_msg);
- default:
- return -1;
- }
-}
-
-static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
-{
- struct kib_rdma_desc *rd;
- int msg_size;
- int nob;
- int n;
- int i;
-
- LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ ||
- msg->ibm_type == IBLND_MSG_PUT_ACK);
-
- rd = msg->ibm_type == IBLND_MSG_GET_REQ ?
- &msg->ibm_u.get.ibgm_rd :
- &msg->ibm_u.putack.ibpam_rd;
-
- if (flip) {
- __swab32s(&rd->rd_key);
- __swab32s(&rd->rd_nfrags);
- }
-
- n = rd->rd_nfrags;
-
- nob = offsetof(struct kib_msg, ibm_u) +
- kiblnd_rd_msg_size(rd, msg->ibm_type, n);
-
- if (msg->ibm_nob < nob) {
- CERROR("Short %s: %d(%d)\n",
- kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob);
- return 1;
- }
-
- msg_size = kiblnd_rd_size(rd);
- if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
- CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
- msg_size, LNET_MAX_PAYLOAD);
- return 1;
- }
-
- if (!flip)
- return 0;
-
- for (i = 0; i < n; i++) {
- __swab32s(&rd->rd_frags[i].rf_nob);
- __swab64s(&rd->rd_frags[i].rf_addr);
- }
-
- return 0;
-}
-
-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
- int credits, lnet_nid_t dstnid, __u64 dststamp)
-{
- struct kib_net *net = ni->ni_data;
-
- /*
- * CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously.
- */
- msg->ibm_magic = IBLND_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = ni->ni_nid;
- msg->ibm_srcstamp = net->ibn_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
-
- if (*kiblnd_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
- }
-}
-
-int kiblnd_unpack_msg(struct kib_msg *msg, int nob)
-{
- const int hdr_size = offsetof(struct kib_msg, ibm_u);
- __u32 msg_cksum;
- __u16 version;
- int msg_nob;
- int flip;
-
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- if (msg->ibm_magic == IBLND_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {
- flip = 1;
- } else {
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
- if (version != IBLND_MSG_VERSION &&
- version != IBLND_MSG_VERSION_1) {
- CERROR("Bad version: %x\n", version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /*
- * checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped
- */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum &&
- msg_cksum != kiblnd_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
-
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- msg->ibm_version = version;
- BUILD_BUG_ON(sizeof(msg->ibm_type) != 1);
- BUILD_BUG_ON(sizeof(msg->ibm_credits) != 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) {
- CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type),
- msg_nob, kiblnd_msgtype2size(msg->ibm_type));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBLND_MSG_NOOP:
- case IBLND_MSG_IMMEDIATE:
- case IBLND_MSG_PUT_REQ:
- break;
-
- case IBLND_MSG_PUT_ACK:
- case IBLND_MSG_GET_REQ:
- if (kiblnd_unpack_rd(msg, flip))
- return -EPROTO;
- break;
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
-
- case IBLND_MSG_CONNREQ:
- case IBLND_MSG_CONNACK:
- if (flip) {
- __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);
- __swab16s(&msg->ibm_u.connparams.ibcp_max_frags);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
- }
- break;
- }
- return 0;
-}
-
-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
- lnet_nid_t nid)
-{
- struct kib_peer *peer;
- struct kib_net *net = ni->ni_data;
- int cpt = lnet_cpt_of_nid(nid);
- unsigned long flags;
-
- LASSERT(net);
- LASSERT(nid != LNET_NID_ANY);
-
- peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
- if (!peer) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- peer->ibp_ni = ni;
- peer->ibp_nid = nid;
- peer->ibp_error = 0;
- peer->ibp_last_alive = 0;
- peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
- peer->ibp_queue_depth = ni->ni_peertxcredits;
- atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD(&peer->ibp_conns);
- INIT_LIST_HEAD(&peer->ibp_tx_queue);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- /* always called with a ref on ni, which prevents ni being shutdown */
- LASSERT(!net->ibn_shutdown);
-
- /* npeers only grows with the global lock held */
- atomic_inc(&net->ibn_npeers);
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- *peerp = peer;
- return 0;
-}
-
-void kiblnd_destroy_peer(struct kib_peer *peer)
-{
- struct kib_net *net = peer->ibp_ni->ni_data;
-
- LASSERT(net);
- LASSERT(!atomic_read(&peer->ibp_refcount));
- LASSERT(!kiblnd_peer_active(peer));
- LASSERT(kiblnd_peer_idle(peer));
- LASSERT(list_empty(&peer->ibp_tx_queue));
-
- kfree(peer);
-
- /*
- * NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero.
- */
- atomic_dec(&net->ibn_npeers);
-}
-
-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid)
-{
- /*
- * the caller is responsible for accounting the additional reference
- * that this creates
- */
- struct list_head *peer_list = kiblnd_nid2peerlist(nid);
- struct list_head *tmp;
- struct kib_peer *peer;
-
- list_for_each(tmp, peer_list) {
- peer = list_entry(tmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_nid != nid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n",
- peer, libcfs_nid2str(nid),
- atomic_read(&peer->ibp_refcount),
- peer->ibp_version);
- return peer;
- }
- return NULL;
-}
-
-void kiblnd_unlink_peer_locked(struct kib_peer *peer)
-{
- LASSERT(list_empty(&peer->ibp_conns));
-
- LASSERT(kiblnd_peer_active(peer));
- list_del_init(&peer->ibp_list);
- /* lose peerlist's ref */
- kiblnd_peer_decref(peer);
-}
-
-static int kiblnd_get_peer_info(struct lnet_ni *ni, int index,
- lnet_nid_t *nidp, int *count)
-{
- struct kib_peer *peer;
- struct list_head *ptmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
- list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *count = atomic_read(&peer->ibp_refcount);
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- return 0;
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- return -ENOENT;
-}
-
-static void kiblnd_del_peer_locked(struct kib_peer *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- struct kib_conn *conn;
-
- if (list_empty(&peer->ibp_conns)) {
- kiblnd_unlink_peer_locked(peer);
- } else {
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- kiblnd_close_conn_locked(conn, 0);
- }
- /* NB closing peer's last conn unlinked it. */
- }
- /*
- * NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it.
- */
-}
-
-static int kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid)
-{
- LIST_HEAD(zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- struct kib_peer *peer;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int rc = -ENOENT;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY) {
- lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- } else {
- lo = 0;
- hi = kiblnd_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT(list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue,
- &zombies);
- }
-
- kiblnd_del_peer_locked(peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_txlist_done(ni, &zombies, -EIO);
-
- return rc;
-}
-
-static struct kib_conn *kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index)
-{
- struct kib_peer *peer;
- struct list_head *ptmp;
- struct kib_conn *conn;
- struct list_head *ctmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
- list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- list_for_each(ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry(ctmp, struct kib_conn,
- ibc_list);
- kiblnd_conn_addref(conn);
- read_unlock_irqrestore(
- &kiblnd_data.kib_global_lock,
- flags);
- return conn;
- }
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- return NULL;
-}
-
-int kiblnd_translate_mtu(int value)
-{
- switch (value) {
- default:
- return -1;
- case 0:
- return 0;
- case 256:
- return IB_MTU_256;
- case 512:
- return IB_MTU_512;
- case 1024:
- return IB_MTU_1024;
- case 2048:
- return IB_MTU_2048;
- case 4096:
- return IB_MTU_4096;
- }
-}
-
-static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
-{
- int mtu;
-
- /* XXX There is no path record for iWARP, set by netdev->change_mtu? */
- if (!cmid->route.path_rec)
- return;
-
- mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
- LASSERT(mtu >= 0);
- if (mtu)
- cmid->route.path_rec->mtu = mtu;
-}
-
-static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
-{
- cpumask_var_t *mask;
- int vectors;
- int off;
- int i;
- lnet_nid_t nid = conn->ibc_peer->ibp_nid;
-
- vectors = conn->ibc_cmid->device->num_comp_vectors;
- if (vectors <= 1)
- return 0;
-
- mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt);
- if (!mask)
- return 0;
-
- /* hash NID to CPU id in this partition... */
- off = do_div(nid, cpumask_weight(*mask));
- for_each_cpu(i, *mask) {
- if (!off--)
- return i % vectors;
- }
-
- LBUG();
- return 1;
-}
-
-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
- int state, int version)
-{
- /*
- * CAVEAT EMPTOR:
- * If the new conn is created successfully it takes over the caller's
- * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself
- * is destroyed. On failure, the caller's ref on 'peer' remains and
- * she must dispose of 'cmid'. (Actually I'd block forever if I tried
- * to destroy 'cmid' here since I'm called from the CM which still has
- * its ref on 'cmid').
- */
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_net *net = peer->ibp_ni->ni_data;
- struct kib_dev *dev;
- struct ib_qp_init_attr *init_qp_attr;
- struct kib_sched_info *sched;
- struct ib_cq_init_attr cq_attr = {};
- struct kib_conn *conn;
- struct ib_cq *cq;
- unsigned long flags;
- int cpt;
- int rc;
- int i;
-
- LASSERT(net);
- LASSERT(!in_interrupt());
-
- dev = net->ibn_dev;
-
- cpt = lnet_cpt_of_nid(peer->ibp_nid);
- sched = kiblnd_data.kib_scheds[cpt];
-
- LASSERT(sched->ibs_nthreads > 0);
-
- init_qp_attr = kzalloc_cpt(sizeof(*init_qp_attr), GFP_NOFS, cpt);
- if (!init_qp_attr) {
- CERROR("Can't allocate qp_attr for %s\n",
- libcfs_nid2str(peer->ibp_nid));
- goto failed_0;
- }
-
- conn = kzalloc_cpt(sizeof(*conn), GFP_NOFS, cpt);
- if (!conn) {
- CERROR("Can't allocate connection for %s\n",
- libcfs_nid2str(peer->ibp_nid));
- goto failed_1;
- }
-
- conn->ibc_state = IBLND_CONN_INIT;
- conn->ibc_version = version;
- conn->ibc_peer = peer; /* I take the caller's ref */
- cmid->context = conn; /* for future CM callbacks */
- conn->ibc_cmid = cmid;
- conn->ibc_max_frags = peer->ibp_max_frags;
- conn->ibc_queue_depth = peer->ibp_queue_depth;
-
- INIT_LIST_HEAD(&conn->ibc_early_rxs);
- INIT_LIST_HEAD(&conn->ibc_tx_noops);
- INIT_LIST_HEAD(&conn->ibc_tx_queue);
- INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD(&conn->ibc_active_txs);
- spin_lock_init(&conn->ibc_lock);
-
- conn->ibc_connvars = kzalloc_cpt(sizeof(*conn->ibc_connvars), GFP_NOFS, cpt);
- if (!conn->ibc_connvars) {
- CERROR("Can't allocate in-progress connection state\n");
- goto failed_2;
- }
-
- write_lock_irqsave(glock, flags);
- if (dev->ibd_failover) {
- write_unlock_irqrestore(glock, flags);
- CERROR("%s: failover in progress\n", dev->ibd_ifname);
- goto failed_2;
- }
-
- if (dev->ibd_hdev->ibh_ibdev != cmid->device) {
- /* wakeup failover thread and teardown connection */
- if (kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- wake_up(&kiblnd_data.kib_failover_waitq);
- }
-
- write_unlock_irqrestore(glock, flags);
- CERROR("cmid HCA(%s), kib_dev(%s) need failover\n",
- cmid->device->name, dev->ibd_ifname);
- goto failed_2;
- }
-
- kiblnd_hdev_addref_locked(dev->ibd_hdev);
- conn->ibc_hdev = dev->ibd_hdev;
-
- kiblnd_setup_mtu_locked(cmid);
-
- write_unlock_irqrestore(glock, flags);
-
- conn->ibc_rxs = kzalloc_cpt(IBLND_RX_MSGS(conn) * sizeof(struct kib_rx),
- GFP_NOFS, cpt);
- if (!conn->ibc_rxs) {
- CERROR("Cannot allocate RX buffers\n");
- goto failed_2;
- }
-
- rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
- IBLND_RX_MSG_PAGES(conn));
- if (rc)
- goto failed_2;
-
- kiblnd_map_rx_descs(conn);
-
- cq_attr.cqe = IBLND_CQ_ENTRIES(conn);
- cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt);
- cq = ib_create_cq(cmid->device,
- kiblnd_cq_completion, kiblnd_cq_event, conn,
- &cq_attr);
- if (IS_ERR(cq)) {
- CERROR("Failed to create CQ with %d CQEs: %ld\n",
- IBLND_CQ_ENTRIES(conn), PTR_ERR(cq));
- goto failed_2;
- }
-
- conn->ibc_cq = cq;
-
- rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- if (rc) {
- CERROR("Can't request completion notification: %d\n", rc);
- goto failed_2;
- }
-
- init_qp_attr->event_handler = kiblnd_qp_event;
- init_qp_attr->qp_context = conn;
- init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn);
- init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
- init_qp_attr->cap.max_send_sge = 1;
- init_qp_attr->cap.max_recv_sge = 1;
- init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
- init_qp_attr->qp_type = IB_QPT_RC;
- init_qp_attr->send_cq = cq;
- init_qp_attr->recv_cq = cq;
-
- conn->ibc_sched = sched;
-
- rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
- if (rc) {
- CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n",
- rc, init_qp_attr->cap.max_send_wr,
- init_qp_attr->cap.max_recv_wr);
- goto failed_2;
- }
-
- kfree(init_qp_attr);
-
- /* 1 ref for caller and each rxmsg */
- atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
- conn->ibc_nrx = IBLND_RX_MSGS(conn);
-
- /* post receives */
- for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
- rc = kiblnd_post_rx(&conn->ibc_rxs[i],
- IBLND_POSTRX_NO_CREDIT);
- if (rc) {
- CERROR("Can't post rxmsg: %d\n", rc);
-
- /* Make posted receives complete */
- kiblnd_abort_receives(conn);
-
- /*
- * correct # of posted buffers
- * NB locking needed now I'm racing with completion
- */
- spin_lock_irqsave(&sched->ibs_lock, flags);
- conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i;
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- /*
- * cmid will be destroyed by CM(ofed) after cm_callback
- * returned, so we can't refer it anymore
- * (by kiblnd_connd()->kiblnd_destroy_conn)
- */
- rdma_destroy_qp(conn->ibc_cmid);
- conn->ibc_cmid = NULL;
-
- /* Drop my own and unused rxbuffer refcounts */
- while (i++ <= IBLND_RX_MSGS(conn))
- kiblnd_conn_decref(conn);
-
- return NULL;
- }
- }
-
- /* Init successful! */
- LASSERT(state == IBLND_CONN_ACTIVE_CONNECT ||
- state == IBLND_CONN_PASSIVE_WAIT);
- conn->ibc_state = state;
-
- /* 1 more conn */
- atomic_inc(&net->ibn_nconns);
- return conn;
-
- failed_2:
- kiblnd_destroy_conn(conn);
- kfree(conn);
- failed_1:
- kfree(init_qp_attr);
- failed_0:
- return NULL;
-}
-
-void kiblnd_destroy_conn(struct kib_conn *conn)
-{
- struct rdma_cm_id *cmid = conn->ibc_cmid;
- struct kib_peer *peer = conn->ibc_peer;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(!atomic_read(&conn->ibc_refcount));
- LASSERT(list_empty(&conn->ibc_early_rxs));
- LASSERT(list_empty(&conn->ibc_tx_noops));
- LASSERT(list_empty(&conn->ibc_tx_queue));
- LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT(list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT(list_empty(&conn->ibc_active_txs));
- LASSERT(!conn->ibc_noops_posted);
- LASSERT(!conn->ibc_nsends_posted);
-
- switch (conn->ibc_state) {
- default:
- /* conn must be completely disengaged from the network */
- LBUG();
-
- case IBLND_CONN_DISCONNECTED:
- /* connvars should have been freed already */
- LASSERT(!conn->ibc_connvars);
- break;
-
- case IBLND_CONN_INIT:
- break;
- }
-
- /* conn->ibc_cmid might be destroyed by CM already */
- if (cmid && cmid->qp)
- rdma_destroy_qp(cmid);
-
- if (conn->ibc_cq) {
- rc = ib_destroy_cq(conn->ibc_cq);
- if (rc)
- CWARN("Error destroying CQ: %d\n", rc);
- }
-
- if (conn->ibc_rx_pages)
- kiblnd_unmap_rx_descs(conn);
-
- kfree(conn->ibc_rxs);
- kfree(conn->ibc_connvars);
-
- if (conn->ibc_hdev)
- kiblnd_hdev_decref(conn->ibc_hdev);
-
- /* See CAVEAT EMPTOR above in kiblnd_create_conn */
- if (conn->ibc_state != IBLND_CONN_INIT) {
- struct kib_net *net = peer->ibp_ni->ni_data;
-
- kiblnd_peer_decref(peer);
- rdma_destroy_id(cmid);
- atomic_dec(&net->ibn_nconns);
- }
-}
-
-int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why)
-{
- struct kib_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_version, why);
-
- kiblnd_close_conn_locked(conn, why);
- count++;
- }
-
- return count;
-}
-
-int kiblnd_close_stale_conns_locked(struct kib_peer *peer,
- int version, __u64 incarnation)
-{
- struct kib_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- if (conn->ibc_version == version &&
- conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET,
- "Closing stale conn -> %s version: %x, incarnation:%#llx(%x, %#llx)\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_version, conn->ibc_incarnation,
- version, incarnation);
-
- kiblnd_close_conn_locked(conn, -ESTALE);
- count++;
- }
-
- return count;
-}
-
-static int kiblnd_close_matching_conns(struct lnet_ni *ni, lnet_nid_t nid)
-{
- struct kib_peer *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int count = 0;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY) {
- lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- } else {
- lo = 0;
- hi = kiblnd_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kiblnd_close_peer_conns_locked(peer, 0);
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return 0;
-
- return !count ? -ENOENT : 0;
-}
-
-static int kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- switch (cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- int count = 0;
-
- rc = kiblnd_get_peer_info(ni, data->ioc_count,
- &nid, &count);
- data->ioc_nid = nid;
- data->ioc_count = count;
- break;
- }
-
- case IOC_LIBCFS_DEL_PEER: {
- rc = kiblnd_del_peer(ni, data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- struct kib_conn *conn;
-
- rc = 0;
- conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
- if (!conn) {
- rc = -ENOENT;
- break;
- }
-
- LASSERT(conn->ibc_cmid);
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- if (!conn->ibc_cmid->route.path_rec)
- data->ioc_u32[0] = 0; /* iWarp has no path MTU */
- else
- data->ioc_u32[0] =
- ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
- kiblnd_conn_decref(conn);
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
- break;
- }
-
- default:
- break;
- }
-
- return rc;
-}
-
-static void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid,
- unsigned long *when)
-{
- unsigned long last_alive = 0;
- unsigned long now = cfs_time_current();
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_peer *peer;
- unsigned long flags;
-
- read_lock_irqsave(glock, flags);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer)
- last_alive = peer->ibp_last_alive;
-
- read_unlock_irqrestore(glock, flags);
-
- if (last_alive)
- *when = last_alive;
-
- /*
- * peer is not persistent in hash, trigger peer creation
- * and connection establishment with a NULL tx
- */
- if (!peer)
- kiblnd_launch_tx(ni, NULL, nid);
-
- CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n",
- libcfs_nid2str(nid), peer,
- last_alive ? cfs_duration_sec(now - last_alive) : -1);
-}
-
-static void kiblnd_free_pages(struct kib_pages *p)
-{
- int npages = p->ibp_npages;
- int i;
-
- for (i = 0; i < npages; i++) {
- if (p->ibp_pages[i])
- __free_page(p->ibp_pages[i]);
- }
-
- kfree(p);
-}
-
-int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages)
-{
- struct kib_pages *p;
- int i;
-
- p = kzalloc_cpt(offsetof(struct kib_pages, ibp_pages[npages]),
- GFP_NOFS, cpt);
- if (!p) {
- CERROR("Can't allocate descriptor for %d pages\n", npages);
- return -ENOMEM;
- }
-
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_pages_node(
- cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_NOFS, 0);
- if (!p->ibp_pages[i]) {
- CERROR("Can't allocate page %d of %d\n", i, npages);
- kiblnd_free_pages(p);
- return -ENOMEM;
- }
- }
-
- *pp = p;
- return 0;
-}
-
-void kiblnd_unmap_rx_descs(struct kib_conn *conn)
-{
- struct kib_rx *rx;
- int i;
-
- LASSERT(conn->ibc_rxs);
- LASSERT(conn->ibc_hdev);
-
- for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
- rx = &conn->ibc_rxs[i];
-
- LASSERT(rx->rx_nob >= 0); /* not posted */
-
- kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev,
- KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
- rx->rx_msgaddr),
- IBLND_MSG_SIZE, DMA_FROM_DEVICE);
- }
-
- kiblnd_free_pages(conn->ibc_rx_pages);
-
- conn->ibc_rx_pages = NULL;
-}
-
-void kiblnd_map_rx_descs(struct kib_conn *conn)
-{
- struct kib_rx *rx;
- struct page *pg;
- int pg_off;
- int ipg;
- int i;
-
- for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) {
- pg = conn->ibc_rx_pages->ibp_pages[ipg];
- rx = &conn->ibc_rxs[i];
-
- rx->rx_conn = conn;
- rx->rx_msg = (struct kib_msg *)(((char *)page_address(pg)) + pg_off);
-
- rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev,
- rx->rx_msg,
- IBLND_MSG_SIZE,
- DMA_FROM_DEVICE);
- LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev,
- rx->rx_msgaddr));
- KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr);
-
- CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n",
- i, rx->rx_msg, rx->rx_msgaddr,
- (__u64)(page_to_phys(pg) + pg_off));
-
- pg_off += IBLND_MSG_SIZE;
- LASSERT(pg_off <= PAGE_SIZE);
-
- if (pg_off == PAGE_SIZE) {
- pg_off = 0;
- ipg++;
- LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn));
- }
- }
-}
-
-static void kiblnd_unmap_tx_pool(struct kib_tx_pool *tpo)
-{
- struct kib_hca_dev *hdev = tpo->tpo_hdev;
- struct kib_tx *tx;
- int i;
-
- LASSERT(!tpo->tpo_pool.po_allocated);
-
- if (!hdev)
- return;
-
- for (i = 0; i < tpo->tpo_pool.po_size; i++) {
- tx = &tpo->tpo_tx_descs[i];
- kiblnd_dma_unmap_single(hdev->ibh_ibdev,
- KIBLND_UNMAP_ADDR(tx, tx_msgunmap,
- tx->tx_msgaddr),
- IBLND_MSG_SIZE, DMA_TO_DEVICE);
- }
-
- kiblnd_hdev_decref(hdev);
- tpo->tpo_hdev = NULL;
-}
-
-static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev)
-{
- struct kib_hca_dev *hdev;
- unsigned long flags;
- int i = 0;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- while (dev->ibd_failover) {
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- if (!(i++ % 50))
- CDEBUG(D_NET, "%s: Wait for failover\n",
- dev->ibd_ifname);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 100);
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- }
-
- kiblnd_hdev_addref_locked(dev->ibd_hdev);
- hdev = dev->ibd_hdev;
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- return hdev;
-}
-
-static void kiblnd_map_tx_pool(struct kib_tx_pool *tpo)
-{
- struct kib_pages *txpgs = tpo->tpo_tx_pages;
- struct kib_pool *pool = &tpo->tpo_pool;
- struct kib_net *net = pool->po_owner->ps_net;
- struct kib_dev *dev;
- struct page *page;
- struct kib_tx *tx;
- int page_offset;
- int ipage;
- int i;
-
- LASSERT(net);
-
- dev = net->ibn_dev;
-
- /* pre-mapped messages are not bigger than 1 page */
- BUILD_BUG_ON(IBLND_MSG_SIZE > PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- BUILD_BUG_ON(PAGE_SIZE % IBLND_MSG_SIZE);
-
- tpo->tpo_hdev = kiblnd_current_hdev(dev);
-
- for (ipage = page_offset = i = 0; i < pool->po_size; i++) {
- page = txpgs->ibp_pages[ipage];
- tx = &tpo->tpo_tx_descs[i];
-
- tx->tx_msg = (struct kib_msg *)(((char *)page_address(page)) +
- page_offset);
-
- tx->tx_msgaddr = kiblnd_dma_map_single(
- tpo->tpo_hdev->ibh_ibdev, tx->tx_msg,
- IBLND_MSG_SIZE, DMA_TO_DEVICE);
- LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev,
- tx->tx_msgaddr));
- KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr);
-
- list_add(&tx->tx_list, &pool->po_free_list);
-
- page_offset += IBLND_MSG_SIZE;
- LASSERT(page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT(ipage <= txpgs->ibp_npages);
- }
- }
-}
-
-static void kiblnd_destroy_fmr_pool(struct kib_fmr_pool *fpo)
-{
- LASSERT(!fpo->fpo_map_count);
-
- if (fpo->fpo_is_fmr) {
- if (fpo->fmr.fpo_fmr_pool)
- ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
- } else {
- struct kib_fast_reg_descriptor *frd, *tmp;
- int i = 0;
-
- list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
- frd_list) {
- list_del(&frd->frd_list);
- ib_dereg_mr(frd->frd_mr);
- kfree(frd);
- i++;
- }
- if (i < fpo->fast_reg.fpo_pool_size)
- CERROR("FastReg pool still has %d regions registered\n",
- fpo->fast_reg.fpo_pool_size - i);
- }
-
- if (fpo->fpo_hdev)
- kiblnd_hdev_decref(fpo->fpo_hdev);
-
- kfree(fpo);
-}
-
-static void kiblnd_destroy_fmr_pool_list(struct list_head *head)
-{
- struct kib_fmr_pool *fpo, *tmp;
-
- list_for_each_entry_safe(fpo, tmp, head, fpo_list) {
- list_del(&fpo->fpo_list);
- kiblnd_destroy_fmr_pool(fpo);
- }
-}
-
-static int
-kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
- int ncpts)
-{
- int size = tunables->lnd_fmr_pool_size / ncpts;
-
- return max(IBLND_FMR_POOL, size);
-}
-
-static int
-kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
- int ncpts)
-{
- int size = tunables->lnd_fmr_flush_trigger / ncpts;
-
- return max(IBLND_FMR_POOL_FLUSH, size);
-}
-
-static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
-{
- struct ib_fmr_pool_param param = {
- .max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE,
- .page_shift = PAGE_SHIFT,
- .access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE),
- .pool_size = fps->fps_pool_size,
- .dirty_watermark = fps->fps_flush_trigger,
- .flush_function = NULL,
- .flush_arg = NULL,
- .cache = !!fps->fps_cache };
- int rc = 0;
-
- fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
- &param);
- if (IS_ERR(fpo->fmr.fpo_fmr_pool)) {
- rc = PTR_ERR(fpo->fmr.fpo_fmr_pool);
- if (rc != -ENOSYS)
- CERROR("Failed to create FMR pool: %d\n", rc);
- else
- CERROR("FMRs are not supported\n");
- }
-
- return rc;
-}
-
-static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
-{
- struct kib_fast_reg_descriptor *frd, *tmp;
- int i, rc;
-
- INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list);
- fpo->fast_reg.fpo_pool_size = 0;
- for (i = 0; i < fps->fps_pool_size; i++) {
- frd = kzalloc_cpt(sizeof(*frd), GFP_NOFS, fps->fps_cpt);
- if (!frd) {
- CERROR("Failed to allocate a new fast_reg descriptor\n");
- rc = -ENOMEM;
- goto out;
- }
-
- frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd,
- IB_MR_TYPE_MEM_REG,
- LNET_MAX_PAYLOAD / PAGE_SIZE);
- if (IS_ERR(frd->frd_mr)) {
- rc = PTR_ERR(frd->frd_mr);
- CERROR("Failed to allocate ib_alloc_mr: %d\n", rc);
- frd->frd_mr = NULL;
- goto out_middle;
- }
-
- frd->frd_valid = true;
-
- list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
- fpo->fast_reg.fpo_pool_size++;
- }
-
- return 0;
-
-out_middle:
- if (frd->frd_mr)
- ib_dereg_mr(frd->frd_mr);
- kfree(frd);
-
-out:
- list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
- frd_list) {
- list_del(&frd->frd_list);
- ib_dereg_mr(frd->frd_mr);
- kfree(frd);
- }
-
- return rc;
-}
-
-static int kiblnd_create_fmr_pool(struct kib_fmr_poolset *fps,
- struct kib_fmr_pool **pp_fpo)
-{
- struct kib_dev *dev = fps->fps_net->ibn_dev;
- struct ib_device_attr *dev_attr;
- struct kib_fmr_pool *fpo;
- int rc;
-
- fpo = kzalloc_cpt(sizeof(*fpo), GFP_NOFS, fps->fps_cpt);
- if (!fpo)
- return -ENOMEM;
-
- fpo->fpo_hdev = kiblnd_current_hdev(dev);
- dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs;
-
- /* Check for FMR or FastReg support */
- fpo->fpo_is_fmr = 0;
- if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr &&
- fpo->fpo_hdev->ibh_ibdev->dealloc_fmr &&
- fpo->fpo_hdev->ibh_ibdev->map_phys_fmr &&
- fpo->fpo_hdev->ibh_ibdev->unmap_fmr) {
- LCONSOLE_INFO("Using FMR for registration\n");
- fpo->fpo_is_fmr = 1;
- } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
- LCONSOLE_INFO("Using FastReg for registration\n");
- } else {
- rc = -ENOSYS;
- LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n");
- goto out_fpo;
- }
-
- if (fpo->fpo_is_fmr)
- rc = kiblnd_alloc_fmr_pool(fps, fpo);
- else
- rc = kiblnd_alloc_freg_pool(fps, fpo);
- if (rc)
- goto out_fpo;
-
- fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
- fpo->fpo_owner = fps;
- *pp_fpo = fpo;
-
- return 0;
-
-out_fpo:
- kiblnd_hdev_decref(fpo->fpo_hdev);
- kfree(fpo);
- return rc;
-}
-
-static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps,
- struct list_head *zombies)
-{
- if (!fps->fps_net) /* initialized? */
- return;
-
- spin_lock(&fps->fps_lock);
-
- while (!list_empty(&fps->fps_pool_list)) {
- struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next,
- struct kib_fmr_pool, fpo_list);
- fpo->fpo_failed = 1;
- list_del(&fpo->fpo_list);
- if (!fpo->fpo_map_count)
- list_add(&fpo->fpo_list, zombies);
- else
- list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
- }
-
- spin_unlock(&fps->fps_lock);
-}
-
-static void kiblnd_fini_fmr_poolset(struct kib_fmr_poolset *fps)
-{
- if (fps->fps_net) { /* initialized? */
- kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list);
- kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list);
- }
-}
-
-static int
-kiblnd_init_fmr_poolset(struct kib_fmr_poolset *fps, int cpt, int ncpts,
- struct kib_net *net,
- struct lnet_ioctl_config_o2iblnd_tunables *tunables)
-{
- struct kib_fmr_pool *fpo;
- int rc;
-
- memset(fps, 0, sizeof(*fps));
-
- fps->fps_net = net;
- fps->fps_cpt = cpt;
-
- fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
- fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
- fps->fps_cache = tunables->lnd_fmr_cache;
-
- spin_lock_init(&fps->fps_lock);
- INIT_LIST_HEAD(&fps->fps_pool_list);
- INIT_LIST_HEAD(&fps->fps_failed_pool_list);
-
- rc = kiblnd_create_fmr_pool(fps, &fpo);
- if (!rc)
- list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
-
- return rc;
-}
-
-static int kiblnd_fmr_pool_is_idle(struct kib_fmr_pool *fpo, unsigned long now)
-{
- if (fpo->fpo_map_count) /* still in use */
- return 0;
- if (fpo->fpo_failed)
- return 1;
- return cfs_time_aftereq(now, fpo->fpo_deadline);
-}
-
-static int
-kiblnd_map_tx_pages(struct kib_tx *tx, struct kib_rdma_desc *rd)
-{
- __u64 *pages = tx->tx_pages;
- struct kib_hca_dev *hdev;
- int npages;
- int size;
- int i;
-
- hdev = tx->tx_pool->tpo_hdev;
-
- for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
- for (size = 0; size < rd->rd_frags[i].rf_nob;
- size += hdev->ibh_page_size) {
- pages[npages++] = (rd->rd_frags[i].rf_addr &
- hdev->ibh_page_mask) + size;
- }
- }
-
- return npages;
-}
-
-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status)
-{
- LIST_HEAD(zombies);
- struct kib_fmr_pool *fpo = fmr->fmr_pool;
- struct kib_fmr_poolset *fps;
- unsigned long now = cfs_time_current();
- struct kib_fmr_pool *tmp;
- int rc;
-
- if (!fpo)
- return;
-
- fps = fpo->fpo_owner;
- if (fpo->fpo_is_fmr) {
- if (fmr->fmr_pfmr) {
- rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
- LASSERT(!rc);
- fmr->fmr_pfmr = NULL;
- }
-
- if (status) {
- rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
- LASSERT(!rc);
- }
- } else {
- struct kib_fast_reg_descriptor *frd = fmr->fmr_frd;
-
- if (frd) {
- frd->frd_valid = false;
- spin_lock(&fps->fps_lock);
- list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
- spin_unlock(&fps->fps_lock);
- fmr->fmr_frd = NULL;
- }
- }
- fmr->fmr_pool = NULL;
-
- spin_lock(&fps->fps_lock);
- fpo->fpo_map_count--; /* decref the pool */
-
- list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) {
- /* the first pool is persistent */
- if (fps->fps_pool_list.next == &fpo->fpo_list)
- continue;
-
- if (kiblnd_fmr_pool_is_idle(fpo, now)) {
- list_move(&fpo->fpo_list, &zombies);
- fps->fps_version++;
- }
- }
- spin_unlock(&fps->fps_lock);
-
- if (!list_empty(&zombies))
- kiblnd_destroy_fmr_pool_list(&zombies);
-}
-
-int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
- struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
- struct kib_fmr *fmr)
-{
- __u64 *pages = tx->tx_pages;
- bool is_rx = (rd != tx->tx_rd);
- bool tx_pages_mapped = false;
- struct kib_fmr_pool *fpo;
- int npages = 0;
- __u64 version;
- int rc;
-
- again:
- spin_lock(&fps->fps_lock);
- version = fps->fps_version;
- list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
- fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
- fpo->fpo_map_count++;
-
- if (fpo->fpo_is_fmr) {
- struct ib_pool_fmr *pfmr;
-
- spin_unlock(&fps->fps_lock);
-
- if (!tx_pages_mapped) {
- npages = kiblnd_map_tx_pages(tx, rd);
- tx_pages_mapped = 1;
- }
-
- pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
- pages, npages, iov);
- if (likely(!IS_ERR(pfmr))) {
- fmr->fmr_key = is_rx ? pfmr->fmr->rkey :
- pfmr->fmr->lkey;
- fmr->fmr_frd = NULL;
- fmr->fmr_pfmr = pfmr;
- fmr->fmr_pool = fpo;
- return 0;
- }
- rc = PTR_ERR(pfmr);
- } else {
- if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
- struct kib_fast_reg_descriptor *frd;
- struct ib_reg_wr *wr;
- struct ib_mr *mr;
- int n;
-
- frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
- struct kib_fast_reg_descriptor,
- frd_list);
- list_del(&frd->frd_list);
- spin_unlock(&fps->fps_lock);
-
- mr = frd->frd_mr;
-
- if (!frd->frd_valid) {
- __u32 key = is_rx ? mr->rkey : mr->lkey;
- struct ib_send_wr *inv_wr;
-
- inv_wr = &frd->frd_inv_wr;
- memset(inv_wr, 0, sizeof(*inv_wr));
- inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->wr_id = IBLND_WID_MR;
- inv_wr->ex.invalidate_rkey = key;
-
- /* Bump the key */
- key = ib_inc_rkey(key);
- ib_update_fast_reg_key(mr, key);
- }
-
- n = ib_map_mr_sg(mr, tx->tx_frags,
- tx->tx_nfrags, NULL, PAGE_SIZE);
- if (unlikely(n != tx->tx_nfrags)) {
- CERROR("Failed to map mr %d/%d elements\n",
- n, tx->tx_nfrags);
- return n < 0 ? n : -EINVAL;
- }
-
- mr->iova = iov;
-
- /* Prepare FastReg WR */
- wr = &frd->frd_fastreg_wr;
- memset(wr, 0, sizeof(*wr));
- wr->wr.opcode = IB_WR_REG_MR;
- wr->wr.wr_id = IBLND_WID_MR;
- wr->wr.num_sge = 0;
- wr->wr.send_flags = 0;
- wr->mr = mr;
- wr->key = is_rx ? mr->rkey : mr->lkey;
- wr->access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE);
-
- fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
- fmr->fmr_frd = frd;
- fmr->fmr_pfmr = NULL;
- fmr->fmr_pool = fpo;
- return 0;
- }
- spin_unlock(&fps->fps_lock);
- rc = -EBUSY;
- }
-
- spin_lock(&fps->fps_lock);
- fpo->fpo_map_count--;
- if (rc != -EAGAIN) {
- spin_unlock(&fps->fps_lock);
- return rc;
- }
-
- /* EAGAIN and ... */
- if (version != fps->fps_version) {
- spin_unlock(&fps->fps_lock);
- goto again;
- }
- }
-
- if (fps->fps_increasing) {
- spin_unlock(&fps->fps_lock);
- CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n");
- schedule();
- goto again;
- }
-
- if (time_before(cfs_time_current(), fps->fps_next_retry)) {
- /* someone failed recently */
- spin_unlock(&fps->fps_lock);
- return -EAGAIN;
- }
-
- fps->fps_increasing = 1;
- spin_unlock(&fps->fps_lock);
-
- CDEBUG(D_NET, "Allocate new FMR pool\n");
- rc = kiblnd_create_fmr_pool(fps, &fpo);
- spin_lock(&fps->fps_lock);
- fps->fps_increasing = 0;
- if (!rc) {
- fps->fps_version++;
- list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
- } else {
- fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY);
- }
- spin_unlock(&fps->fps_lock);
-
- goto again;
-}
-
-static void kiblnd_fini_pool(struct kib_pool *pool)
-{
- LASSERT(list_empty(&pool->po_free_list));
- LASSERT(!pool->po_allocated);
-
- CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
-}
-
-static void kiblnd_init_pool(struct kib_poolset *ps, struct kib_pool *pool, int size)
-{
- CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
-
- memset(pool, 0, sizeof(*pool));
- INIT_LIST_HEAD(&pool->po_free_list);
- pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
- pool->po_owner = ps;
- pool->po_size = size;
-}
-
-static void kiblnd_destroy_pool_list(struct list_head *head)
-{
- struct kib_pool *pool;
-
- while (!list_empty(head)) {
- pool = list_entry(head->next, struct kib_pool, po_list);
- list_del(&pool->po_list);
-
- LASSERT(pool->po_owner);
- pool->po_owner->ps_pool_destroy(pool);
- }
-}
-
-static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies)
-{
- if (!ps->ps_net) /* initialized? */
- return;
-
- spin_lock(&ps->ps_lock);
- while (!list_empty(&ps->ps_pool_list)) {
- struct kib_pool *po = list_entry(ps->ps_pool_list.next,
- struct kib_pool, po_list);
- po->po_failed = 1;
- list_del(&po->po_list);
- if (!po->po_allocated)
- list_add(&po->po_list, zombies);
- else
- list_add(&po->po_list, &ps->ps_failed_pool_list);
- }
- spin_unlock(&ps->ps_lock);
-}
-
-static void kiblnd_fini_poolset(struct kib_poolset *ps)
-{
- if (ps->ps_net) { /* initialized? */
- kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
- kiblnd_destroy_pool_list(&ps->ps_pool_list);
- }
-}
-
-static int kiblnd_init_poolset(struct kib_poolset *ps, int cpt,
- struct kib_net *net, char *name, int size,
- kib_ps_pool_create_t po_create,
- kib_ps_pool_destroy_t po_destroy,
- kib_ps_node_init_t nd_init,
- kib_ps_node_fini_t nd_fini)
-{
- struct kib_pool *pool;
- int rc;
-
- memset(ps, 0, sizeof(*ps));
-
- ps->ps_cpt = cpt;
- ps->ps_net = net;
- ps->ps_pool_create = po_create;
- ps->ps_pool_destroy = po_destroy;
- ps->ps_node_init = nd_init;
- ps->ps_node_fini = nd_fini;
- ps->ps_pool_size = size;
- if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name))
- >= sizeof(ps->ps_name))
- return -E2BIG;
- spin_lock_init(&ps->ps_lock);
- INIT_LIST_HEAD(&ps->ps_pool_list);
- INIT_LIST_HEAD(&ps->ps_failed_pool_list);
-
- rc = ps->ps_pool_create(ps, size, &pool);
- if (!rc)
- list_add(&pool->po_list, &ps->ps_pool_list);
- else
- CERROR("Failed to create the first pool for %s\n", ps->ps_name);
-
- return rc;
-}
-
-static int kiblnd_pool_is_idle(struct kib_pool *pool, unsigned long now)
-{
- if (pool->po_allocated) /* still in use */
- return 0;
- if (pool->po_failed)
- return 1;
- return cfs_time_aftereq(now, pool->po_deadline);
-}
-
-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node)
-{
- LIST_HEAD(zombies);
- struct kib_poolset *ps = pool->po_owner;
- struct kib_pool *tmp;
- unsigned long now = cfs_time_current();
-
- spin_lock(&ps->ps_lock);
-
- if (ps->ps_node_fini)
- ps->ps_node_fini(pool, node);
-
- LASSERT(pool->po_allocated > 0);
- list_add(node, &pool->po_free_list);
- pool->po_allocated--;
-
- list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) {
- /* the first pool is persistent */
- if (ps->ps_pool_list.next == &pool->po_list)
- continue;
-
- if (kiblnd_pool_is_idle(pool, now))
- list_move(&pool->po_list, &zombies);
- }
- spin_unlock(&ps->ps_lock);
-
- if (!list_empty(&zombies))
- kiblnd_destroy_pool_list(&zombies);
-}
-
-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps)
-{
- struct list_head *node;
- struct kib_pool *pool;
- unsigned int interval = 1;
- unsigned long time_before;
- unsigned int trips = 0;
- int rc;
-
- again:
- spin_lock(&ps->ps_lock);
- list_for_each_entry(pool, &ps->ps_pool_list, po_list) {
- if (list_empty(&pool->po_free_list))
- continue;
-
- pool->po_allocated++;
- pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
- node = pool->po_free_list.next;
- list_del(node);
-
- if (ps->ps_node_init) {
- /* still hold the lock */
- ps->ps_node_init(pool, node);
- }
- spin_unlock(&ps->ps_lock);
- return node;
- }
-
- /* no available tx pool and ... */
- if (ps->ps_increasing) {
- /* another thread is allocating a new pool */
- spin_unlock(&ps->ps_lock);
- trips++;
- CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n",
- ps->ps_name, interval, trips);
-
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(interval);
- if (interval < HZ)
- interval *= 2;
-
- goto again;
- }
-
- if (time_before(cfs_time_current(), ps->ps_next_retry)) {
- /* someone failed recently */
- spin_unlock(&ps->ps_lock);
- return NULL;
- }
-
- ps->ps_increasing = 1;
- spin_unlock(&ps->ps_lock);
-
- CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name);
- time_before = cfs_time_current();
- rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool);
- CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete",
- cfs_time_current() - time_before);
-
- spin_lock(&ps->ps_lock);
- ps->ps_increasing = 0;
- if (!rc) {
- list_add_tail(&pool->po_list, &ps->ps_pool_list);
- } else {
- ps->ps_next_retry = cfs_time_shift(IBLND_POOL_RETRY);
- CERROR("Can't allocate new %s pool because out of memory\n",
- ps->ps_name);
- }
- spin_unlock(&ps->ps_lock);
-
- goto again;
-}
-
-static void kiblnd_destroy_tx_pool(struct kib_pool *pool)
-{
- struct kib_tx_pool *tpo = container_of(pool, struct kib_tx_pool, tpo_pool);
- int i;
-
- LASSERT(!pool->po_allocated);
-
- if (tpo->tpo_tx_pages) {
- kiblnd_unmap_tx_pool(tpo);
- kiblnd_free_pages(tpo->tpo_tx_pages);
- }
-
- if (!tpo->tpo_tx_descs)
- goto out;
-
- for (i = 0; i < pool->po_size; i++) {
- struct kib_tx *tx = &tpo->tpo_tx_descs[i];
-
- list_del(&tx->tx_list);
- kfree(tx->tx_pages);
- kfree(tx->tx_frags);
- kfree(tx->tx_wrq);
- kfree(tx->tx_sge);
- kfree(tx->tx_rd);
- }
-
- kfree(tpo->tpo_tx_descs);
-out:
- kiblnd_fini_pool(pool);
- kfree(tpo);
-}
-
-static int kiblnd_tx_pool_size(int ncpts)
-{
- int ntx = *kiblnd_tunables.kib_ntx / ncpts;
-
- return max(IBLND_TX_POOL, ntx);
-}
-
-static int kiblnd_create_tx_pool(struct kib_poolset *ps, int size,
- struct kib_pool **pp_po)
-{
- int i;
- int npg;
- struct kib_pool *pool;
- struct kib_tx_pool *tpo;
-
- tpo = kzalloc_cpt(sizeof(*tpo), GFP_NOFS, ps->ps_cpt);
- if (!tpo) {
- CERROR("Failed to allocate TX pool\n");
- return -ENOMEM;
- }
-
- pool = &tpo->tpo_pool;
- kiblnd_init_pool(ps, pool, size);
- tpo->tpo_tx_descs = NULL;
- tpo->tpo_tx_pages = NULL;
-
- npg = DIV_ROUND_UP(size * IBLND_MSG_SIZE, PAGE_SIZE);
- if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) {
- CERROR("Can't allocate tx pages: %d\n", npg);
- kfree(tpo);
- return -ENOMEM;
- }
-
- tpo->tpo_tx_descs = kzalloc_cpt(size * sizeof(struct kib_tx),
- GFP_NOFS, ps->ps_cpt);
- if (!tpo->tpo_tx_descs) {
- CERROR("Can't allocate %d tx descriptors\n", size);
- ps->ps_pool_destroy(pool);
- return -ENOMEM;
- }
-
- memset(tpo->tpo_tx_descs, 0, size * sizeof(struct kib_tx));
-
- for (i = 0; i < size; i++) {
- struct kib_tx *tx = &tpo->tpo_tx_descs[i];
-
- tx->tx_pool = tpo;
- if (ps->ps_net->ibn_fmr_ps) {
- tx->tx_pages = kzalloc_cpt(LNET_MAX_IOV * sizeof(*tx->tx_pages),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_pages)
- break;
- }
-
- tx->tx_frags = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_frags),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_frags)
- break;
-
- sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1);
-
- tx->tx_wrq = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_wrq)
- break;
-
- tx->tx_sge = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_sge),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_sge)
- break;
-
- tx->tx_rd = kzalloc_cpt(offsetof(struct kib_rdma_desc,
- rd_frags[IBLND_MAX_RDMA_FRAGS]),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_rd)
- break;
- }
-
- if (i == size) {
- kiblnd_map_tx_pool(tpo);
- *pp_po = pool;
- return 0;
- }
-
- ps->ps_pool_destroy(pool);
- return -ENOMEM;
-}
-
-static void kiblnd_tx_init(struct kib_pool *pool, struct list_head *node)
-{
- struct kib_tx_poolset *tps = container_of(pool->po_owner,
- struct kib_tx_poolset,
- tps_poolset);
- struct kib_tx *tx = list_entry(node, struct kib_tx, tx_list);
-
- tx->tx_cookie = tps->tps_next_tx_cookie++;
-}
-
-static void kiblnd_net_fini_pools(struct kib_net *net)
-{
- int i;
-
- cfs_cpt_for_each(i, lnet_cpt_table()) {
- struct kib_tx_poolset *tps;
- struct kib_fmr_poolset *fps;
-
- if (net->ibn_tx_ps) {
- tps = net->ibn_tx_ps[i];
- kiblnd_fini_poolset(&tps->tps_poolset);
- }
-
- if (net->ibn_fmr_ps) {
- fps = net->ibn_fmr_ps[i];
- kiblnd_fini_fmr_poolset(fps);
- }
- }
-
- if (net->ibn_tx_ps) {
- cfs_percpt_free(net->ibn_tx_ps);
- net->ibn_tx_ps = NULL;
- }
-
- if (net->ibn_fmr_ps) {
- cfs_percpt_free(net->ibn_fmr_ps);
- net->ibn_fmr_ps = NULL;
- }
-}
-
-static int kiblnd_net_init_pools(struct kib_net *net, struct lnet_ni *ni,
- __u32 *cpts, int ncpts)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- int cpt;
- int rc;
- int i;
-
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
- if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
- CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
- tunables->lnd_fmr_pool_size,
- *kiblnd_tunables.kib_ntx / 4);
- rc = -EINVAL;
- goto failed;
- }
-
- /*
- * TX pool must be created later than FMR, see LU-2268
- * for details
- */
- LASSERT(!net->ibn_tx_ps);
-
- /*
- * premapping can fail if ibd_nmr > 1, so we always create
- * FMR pool and map-on-demand if premapping failed
- *
- * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset
- * The number of struct kib_fmr_poolsets create is equal to the
- * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt].
- */
- net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct kib_fmr_poolset));
- if (!net->ibn_fmr_ps) {
- CERROR("Failed to allocate FMR pool array\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- for (i = 0; i < ncpts; i++) {
- cpt = !cpts ? i : cpts[i];
- rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
- net, tunables);
- if (rc) {
- CERROR("Can't initialize FMR pool for CPT %d: %d\n",
- cpt, rc);
- goto failed;
- }
- }
-
- if (i > 0)
- LASSERT(i == ncpts);
-
- /*
- * cfs_precpt_alloc is creating an array of struct kib_tx_poolset
- * The number of struct kib_tx_poolsets create is equal to the
- * number of CPTs that exist, i.e net->ibn_tx_ps[cpt].
- */
- net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct kib_tx_poolset));
- if (!net->ibn_tx_ps) {
- CERROR("Failed to allocate tx pool array\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- for (i = 0; i < ncpts; i++) {
- cpt = !cpts ? i : cpts[i];
- rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset,
- cpt, net, "TX",
- kiblnd_tx_pool_size(ncpts),
- kiblnd_create_tx_pool,
- kiblnd_destroy_tx_pool,
- kiblnd_tx_init, NULL);
- if (rc) {
- CERROR("Can't initialize TX pool for CPT %d: %d\n",
- cpt, rc);
- goto failed;
- }
- }
-
- return 0;
- failed:
- kiblnd_net_fini_pools(net);
- LASSERT(rc);
- return rc;
-}
-
-static int kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
-{
- /*
- * It's safe to assume a HCA can handle a page size
- * matching that of the native system
- */
- hdev->ibh_page_shift = PAGE_SHIFT;
- hdev->ibh_page_size = 1 << PAGE_SHIFT;
- hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1);
-
- hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
- if (hdev->ibh_mr_size == ~0ULL) {
- hdev->ibh_mr_shift = 64;
- return 0;
- }
-
- CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size);
- return -EINVAL;
-}
-
-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev)
-{
- if (hdev->ibh_pd)
- ib_dealloc_pd(hdev->ibh_pd);
-
- if (hdev->ibh_cmid)
- rdma_destroy_id(hdev->ibh_cmid);
-
- kfree(hdev);
-}
-
-/* DUMMY */
-static int kiblnd_dummy_callback(struct rdma_cm_id *cmid,
- struct rdma_cm_event *event)
-{
- return 0;
-}
-
-static int kiblnd_dev_need_failover(struct kib_dev *dev)
-{
- struct rdma_cm_id *cmid;
- struct sockaddr_in srcaddr;
- struct sockaddr_in dstaddr;
- int rc;
-
- if (!dev->ibd_hdev || /* initializing */
- !dev->ibd_hdev->ibh_cmid || /* listener is dead */
- *kiblnd_tunables.kib_dev_failover > 1) /* debugging */
- return 1;
-
- /*
- * XXX: it's UGLY, but I don't have better way to find
- * ib-bonding HCA failover because:
- *
- * a. no reliable CM event for HCA failover...
- * b. no OFED API to get ib_device for current net_device...
- *
- * We have only two choices at this point:
- *
- * a. rdma_bind_addr(), it will conflict with listener cmid
- * b. rdma_resolve_addr() to zero addr
- */
- cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
- IB_QPT_RC);
- if (IS_ERR(cmid)) {
- rc = PTR_ERR(cmid);
- CERROR("Failed to create cmid for failover: %d\n", rc);
- return rc;
- }
-
- memset(&srcaddr, 0, sizeof(srcaddr));
- srcaddr.sin_family = AF_INET;
- srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
-
- memset(&dstaddr, 0, sizeof(dstaddr));
- dstaddr.sin_family = AF_INET;
- rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr,
- (struct sockaddr *)&dstaddr, 1);
- if (rc || !cmid->device) {
- CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
- dev->ibd_ifname, &dev->ibd_ifip,
- cmid->device, rc);
- rdma_destroy_id(cmid);
- return rc;
- }
-
- rc = dev->ibd_hdev->ibh_ibdev != cmid->device; /* true for failover */
- rdma_destroy_id(cmid);
-
- return rc;
-}
-
-int kiblnd_dev_failover(struct kib_dev *dev)
-{
- LIST_HEAD(zombie_tpo);
- LIST_HEAD(zombie_ppo);
- LIST_HEAD(zombie_fpo);
- struct rdma_cm_id *cmid = NULL;
- struct kib_hca_dev *hdev = NULL;
- struct ib_pd *pd;
- struct kib_net *net;
- struct sockaddr_in addr;
- unsigned long flags;
- int rc = 0;
- int i;
-
- LASSERT(*kiblnd_tunables.kib_dev_failover > 1 ||
- dev->ibd_can_failover || !dev->ibd_hdev);
-
- rc = kiblnd_dev_need_failover(dev);
- if (rc <= 0)
- goto out;
-
- if (dev->ibd_hdev &&
- dev->ibd_hdev->ibh_cmid) {
- /*
- * XXX it's not good to close old listener at here,
- * because we can fail to create new listener.
- * But we have to close it now, otherwise rdma_bind_addr
- * will return EADDRINUSE... How crap!
- */
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- cmid = dev->ibd_hdev->ibh_cmid;
- /*
- * make next schedule of kiblnd_dev_need_failover()
- * return 1 for me
- */
- dev->ibd_hdev->ibh_cmid = NULL;
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- rdma_destroy_id(cmid);
- }
-
- cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
- IB_QPT_RC);
- if (IS_ERR(cmid)) {
- rc = PTR_ERR(cmid);
- CERROR("Failed to create cmid for failover: %d\n", rc);
- goto out;
- }
-
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(dev->ibd_ifip);
- addr.sin_port = htons(*kiblnd_tunables.kib_service);
-
- /* Bind to failover device or port */
- rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
- if (rc || !cmid->device) {
- CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
- dev->ibd_ifname, &dev->ibd_ifip,
- cmid->device, rc);
- rdma_destroy_id(cmid);
- goto out;
- }
-
- hdev = kzalloc(sizeof(*hdev), GFP_NOFS);
- if (!hdev) {
- CERROR("Failed to allocate kib_hca_dev\n");
- rdma_destroy_id(cmid);
- rc = -ENOMEM;
- goto out;
- }
-
- atomic_set(&hdev->ibh_ref, 1);
- hdev->ibh_dev = dev;
- hdev->ibh_cmid = cmid;
- hdev->ibh_ibdev = cmid->device;
-
- pd = ib_alloc_pd(cmid->device, 0);
- if (IS_ERR(pd)) {
- rc = PTR_ERR(pd);
- CERROR("Can't allocate PD: %d\n", rc);
- goto out;
- }
-
- hdev->ibh_pd = pd;
-
- rc = rdma_listen(cmid, 0);
- if (rc) {
- CERROR("Can't start new listener: %d\n", rc);
- goto out;
- }
-
- rc = kiblnd_hdev_get_attr(hdev);
- if (rc) {
- CERROR("Can't get device attributes: %d\n", rc);
- goto out;
- }
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- swap(dev->ibd_hdev, hdev); /* take over the refcount */
-
- list_for_each_entry(net, &dev->ibd_nets, ibn_list) {
- cfs_cpt_for_each(i, lnet_cpt_table()) {
- kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset,
- &zombie_tpo);
-
- if (net->ibn_fmr_ps)
- kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i],
- &zombie_fpo);
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- out:
- if (!list_empty(&zombie_tpo))
- kiblnd_destroy_pool_list(&zombie_tpo);
- if (!list_empty(&zombie_ppo))
- kiblnd_destroy_pool_list(&zombie_ppo);
- if (!list_empty(&zombie_fpo))
- kiblnd_destroy_fmr_pool_list(&zombie_fpo);
- if (hdev)
- kiblnd_hdev_decref(hdev);
-
- if (rc)
- dev->ibd_failed_failover++;
- else
- dev->ibd_failed_failover = 0;
-
- return rc;
-}
-
-void kiblnd_destroy_dev(struct kib_dev *dev)
-{
- LASSERT(!dev->ibd_nnets);
- LASSERT(list_empty(&dev->ibd_nets));
-
- list_del(&dev->ibd_fail_list);
- list_del(&dev->ibd_list);
-
- if (dev->ibd_hdev)
- kiblnd_hdev_decref(dev->ibd_hdev);
-
- kfree(dev);
-}
-
-static struct kib_dev *kiblnd_create_dev(char *ifname)
-{
- struct net_device *netdev;
- struct kib_dev *dev;
- __u32 netmask;
- __u32 ip;
- int up;
- int rc;
-
- rc = lnet_ipif_query(ifname, &up, &ip, &netmask);
- if (rc) {
- CERROR("Can't query IPoIB interface %s: %d\n",
- ifname, rc);
- return NULL;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n", ifname);
- return NULL;
- }
-
- dev = kzalloc(sizeof(*dev), GFP_NOFS);
- if (!dev)
- return NULL;
-
- netdev = dev_get_by_name(&init_net, ifname);
- if (!netdev) {
- dev->ibd_can_failover = 0;
- } else {
- dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER);
- dev_put(netdev);
- }
-
- INIT_LIST_HEAD(&dev->ibd_nets);
- INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */
- INIT_LIST_HEAD(&dev->ibd_fail_list);
- dev->ibd_ifip = ip;
- strcpy(&dev->ibd_ifname[0], ifname);
-
- /* initialize the device */
- rc = kiblnd_dev_failover(dev);
- if (rc) {
- CERROR("Can't initialize device: %d\n", rc);
- kfree(dev);
- return NULL;
- }
-
- list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs);
- return dev;
-}
-
-static void kiblnd_base_shutdown(void)
-{
- struct kib_sched_info *sched;
- int i;
-
- LASSERT(list_empty(&kiblnd_data.kib_devs));
-
- switch (kiblnd_data.kib_init) {
- default:
- LBUG();
-
- case IBLND_INIT_ALL:
- case IBLND_INIT_DATA:
- LASSERT(kiblnd_data.kib_peers);
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
- LASSERT(list_empty(&kiblnd_data.kib_peers[i]));
- LASSERT(list_empty(&kiblnd_data.kib_connd_zombies));
- LASSERT(list_empty(&kiblnd_data.kib_connd_conns));
- LASSERT(list_empty(&kiblnd_data.kib_reconn_list));
- LASSERT(list_empty(&kiblnd_data.kib_reconn_wait));
-
- /* flag threads to terminate; wake and wait for them to die */
- kiblnd_data.kib_shutdown = 1;
-
- /*
- * NB: we really want to stop scheduler threads net by net
- * instead of the whole module, this should be improved
- * with dynamic configuration LNet
- */
- cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds)
- wake_up_all(&sched->ibs_waitq);
-
- wake_up_all(&kiblnd_data.kib_connd_waitq);
- wake_up_all(&kiblnd_data.kib_failover_waitq);
-
- i = 2;
- while (atomic_read(&kiblnd_data.kib_nthreads)) {
- i++;
- /* power of 2 ? */
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for %d threads to terminate\n",
- atomic_read(&kiblnd_data.kib_nthreads));
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-
- /* fall through */
-
- case IBLND_INIT_NOTHING:
- break;
- }
-
- kvfree(kiblnd_data.kib_peers);
-
- if (kiblnd_data.kib_scheds)
- cfs_percpt_free(kiblnd_data.kib_scheds);
-
- kiblnd_data.kib_init = IBLND_INIT_NOTHING;
- module_put(THIS_MODULE);
-}
-
-static void kiblnd_shutdown(struct lnet_ni *ni)
-{
- struct kib_net *net = ni->ni_data;
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- int i;
- unsigned long flags;
-
- LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
-
- if (!net)
- goto out;
-
- write_lock_irqsave(g_lock, flags);
- net->ibn_shutdown = 1;
- write_unlock_irqrestore(g_lock, flags);
-
- switch (net->ibn_init) {
- default:
- LBUG();
-
- case IBLND_INIT_ALL:
- /* nuke all existing peers within this net */
- kiblnd_del_peer(ni, LNET_NID_ANY);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read(&net->ibn_npeers)) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
- "%s: waiting for %d peers to disconnect\n",
- libcfs_nid2str(ni->ni_nid),
- atomic_read(&net->ibn_npeers));
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-
- kiblnd_net_fini_pools(net);
-
- write_lock_irqsave(g_lock, flags);
- LASSERT(net->ibn_dev->ibd_nnets > 0);
- net->ibn_dev->ibd_nnets--;
- list_del(&net->ibn_list);
- write_unlock_irqrestore(g_lock, flags);
-
- /* fall through */
-
- case IBLND_INIT_NOTHING:
- LASSERT(!atomic_read(&net->ibn_nconns));
-
- if (net->ibn_dev && !net->ibn_dev->ibd_nnets)
- kiblnd_destroy_dev(net->ibn_dev);
-
- break;
- }
-
- net->ibn_init = IBLND_INIT_NOTHING;
- ni->ni_data = NULL;
-
- kfree(net);
-
-out:
- if (list_empty(&kiblnd_data.kib_devs))
- kiblnd_base_shutdown();
-}
-
-static int kiblnd_base_startup(void)
-{
- struct kib_sched_info *sched;
- int rc;
- int i;
-
- LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING);
-
- try_module_get(THIS_MODULE);
- /* zero pointers, flags etc */
- memset(&kiblnd_data, 0, sizeof(kiblnd_data));
-
- rwlock_init(&kiblnd_data.kib_global_lock);
-
- INIT_LIST_HEAD(&kiblnd_data.kib_devs);
- INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs);
-
- kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
- kiblnd_data.kib_peers = kvmalloc_array(kiblnd_data.kib_peer_hash_size,
- sizeof(struct list_head),
- GFP_KERNEL);
- if (!kiblnd_data.kib_peers)
- goto failed;
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]);
-
- spin_lock_init(&kiblnd_data.kib_connd_lock);
- INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
- INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
- INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
- INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
-
- init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
- init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
-
- kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*sched));
- if (!kiblnd_data.kib_scheds)
- goto failed;
-
- cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) {
- int nthrs;
-
- spin_lock_init(&sched->ibs_lock);
- INIT_LIST_HEAD(&sched->ibs_conns);
- init_waitqueue_head(&sched->ibs_waitq);
-
- nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
- if (*kiblnd_tunables.kib_nscheds > 0) {
- nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds);
- } else {
- /*
- * max to half of CPUs, another half is reserved for
- * upper layer modules
- */
- nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
- }
-
- sched->ibs_nthreads_max = nthrs;
- sched->ibs_cpt = i;
- }
-
- kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR;
-
- /* lists/ptrs/locks initialised */
- kiblnd_data.kib_init = IBLND_INIT_DATA;
- /*****************************************************/
-
- rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd");
- if (rc) {
- CERROR("Can't spawn o2iblnd connd: %d\n", rc);
- goto failed;
- }
-
- if (*kiblnd_tunables.kib_dev_failover)
- rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
- "kiblnd_failover");
-
- if (rc) {
- CERROR("Can't spawn o2iblnd failover thread: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kiblnd_data.kib_init = IBLND_INIT_ALL;
- /*****************************************************/
-
- return 0;
-
- failed:
- kiblnd_base_shutdown();
- return -ENETDOWN;
-}
-
-static int kiblnd_start_schedulers(struct kib_sched_info *sched)
-{
- int rc = 0;
- int nthrs;
- int i;
-
- if (!sched->ibs_nthreads) {
- if (*kiblnd_tunables.kib_nscheds > 0) {
- nthrs = sched->ibs_nthreads_max;
- } else {
- nthrs = cfs_cpt_weight(lnet_cpt_table(),
- sched->ibs_cpt);
- nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
- nthrs = min(IBLND_N_SCHED_HIGH, nthrs);
- }
- } else {
- LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max);
- /* increase one thread if there is new interface */
- nthrs = sched->ibs_nthreads < sched->ibs_nthreads_max;
- }
-
- for (i = 0; i < nthrs; i++) {
- long id;
- char name[20];
-
- id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
- snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld",
- KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
- rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name);
- if (!rc)
- continue;
-
- CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
- sched->ibs_cpt, sched->ibs_nthreads + i, rc);
- break;
- }
-
- sched->ibs_nthreads += i;
- return rc;
-}
-
-static int kiblnd_dev_start_threads(struct kib_dev *dev, int newdev, __u32 *cpts,
- int ncpts)
-{
- int cpt;
- int rc;
- int i;
-
- for (i = 0; i < ncpts; i++) {
- struct kib_sched_info *sched;
-
- cpt = !cpts ? i : cpts[i];
- sched = kiblnd_data.kib_scheds[cpt];
-
- if (!newdev && sched->ibs_nthreads > 0)
- continue;
-
- rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]);
- if (rc) {
- CERROR("Failed to start scheduler threads for %s\n",
- dev->ibd_ifname);
- return rc;
- }
- }
- return 0;
-}
-
-static struct kib_dev *kiblnd_dev_search(char *ifname)
-{
- struct kib_dev *alias = NULL;
- struct kib_dev *dev;
- char *colon;
- char *colon2;
-
- colon = strchr(ifname, ':');
- list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
- if (!strcmp(&dev->ibd_ifname[0], ifname))
- return dev;
-
- if (alias)
- continue;
-
- colon2 = strchr(dev->ibd_ifname, ':');
- if (colon)
- *colon = 0;
- if (colon2)
- *colon2 = 0;
-
- if (!strcmp(&dev->ibd_ifname[0], ifname))
- alias = dev;
-
- if (colon)
- *colon = ':';
- if (colon2)
- *colon2 = ':';
- }
- return alias;
-}
-
-static int kiblnd_startup(struct lnet_ni *ni)
-{
- char *ifname;
- struct kib_dev *ibdev = NULL;
- struct kib_net *net;
- struct timespec64 tv;
- unsigned long flags;
- int rc;
- int newdev;
-
- LASSERT(ni->ni_lnd == &the_o2iblnd);
-
- if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
- rc = kiblnd_base_startup();
- if (rc)
- return rc;
- }
-
- net = kzalloc(sizeof(*net), GFP_NOFS);
- ni->ni_data = net;
- if (!net)
- goto net_failed;
-
- ktime_get_real_ts64(&tv);
- net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC +
- tv.tv_nsec / NSEC_PER_USEC;
-
- rc = kiblnd_tunables_setup(ni);
- if (rc)
- goto net_failed;
-
- if (ni->ni_interfaces[0]) {
- /* Use the IPoIB interface specified in 'networks=' */
-
- BUILD_BUG_ON(LNET_MAX_INTERFACES <= 1);
- if (ni->ni_interfaces[1]) {
- CERROR("Multiple interfaces not supported\n");
- goto failed;
- }
-
- ifname = ni->ni_interfaces[0];
- } else {
- ifname = *kiblnd_tunables.kib_default_ipif;
- }
-
- if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
- CERROR("IPoIB interface name too long: %s\n", ifname);
- goto failed;
- }
-
- ibdev = kiblnd_dev_search(ifname);
-
- newdev = !ibdev;
- /* hmm...create kib_dev even for alias */
- if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname))
- ibdev = kiblnd_create_dev(ifname);
-
- if (!ibdev)
- goto failed;
-
- net->ibn_dev = ibdev;
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
-
- rc = kiblnd_dev_start_threads(ibdev, newdev,
- ni->ni_cpts, ni->ni_ncpts);
- if (rc)
- goto failed;
-
- rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
- if (rc) {
- CERROR("Failed to initialize NI pools: %d\n", rc);
- goto failed;
- }
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- ibdev->ibd_nnets++;
- list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- net->ibn_init = IBLND_INIT_ALL;
-
- return 0;
-
-failed:
- if (!net->ibn_dev && ibdev)
- kiblnd_destroy_dev(ibdev);
-
-net_failed:
- kiblnd_shutdown(ni);
-
- CDEBUG(D_NET, "%s failed\n", __func__);
- return -ENETDOWN;
-}
-
-static struct lnet_lnd the_o2iblnd = {
- .lnd_type = O2IBLND,
- .lnd_startup = kiblnd_startup,
- .lnd_shutdown = kiblnd_shutdown,
- .lnd_ctl = kiblnd_ctl,
- .lnd_query = kiblnd_query,
- .lnd_send = kiblnd_send,
- .lnd_recv = kiblnd_recv,
-};
-
-static void __exit ko2iblnd_exit(void)
-{
- lnet_unregister_lnd(&the_o2iblnd);
-}
-
-static int __init ko2iblnd_init(void)
-{
- BUILD_BUG_ON(sizeof(struct kib_msg) > IBLND_MSG_SIZE);
- BUILD_BUG_ON(offsetof(struct kib_msg,
- ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
- > IBLND_MSG_SIZE);
- BUILD_BUG_ON(offsetof(struct kib_msg,
- ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
- > IBLND_MSG_SIZE);
-
- kiblnd_tunables_init();
-
- lnet_register_lnd(&the_o2iblnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(ko2iblnd_init);
-module_exit(ko2iblnd_exit);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
deleted file mode 100644
index b18911d09e9a..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ /dev/null
@@ -1,1038 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd.h
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-#include <linux/uaccess.h>
-
-#include <linux/io.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/pci.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#include <rdma/rdma_cm.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_fmr_pool.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-
-#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */
-/* # scheduler loops before reschedule */
-#define IBLND_RESCHED 100
-
-#define IBLND_N_SCHED 2
-#define IBLND_N_SCHED_HIGH 4
-
-struct kib_tunables {
- int *kib_dev_failover; /* HCA failover */
- unsigned int *kib_service; /* IB service number */
- int *kib_min_reconnect_interval; /* first failed connection retry... */
- int *kib_max_reconnect_interval; /* exponentially increasing to this */
- int *kib_cksum; /* checksum struct kib_msg? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_keepalive; /* keepalive timeout (seconds) */
- int *kib_ntx; /* # tx descs */
- char **kib_default_ipif; /* default IPoIB interface */
- int *kib_retry_count;
- int *kib_rnr_retry_count;
- int *kib_ib_mtu; /* IB MTU */
- int *kib_require_priv_port; /* accept only privileged ports */
- int *kib_use_priv_port; /* use privileged port for active connect */
- int *kib_nscheds; /* # threads on each CPT */
-};
-
-extern struct kib_tunables kiblnd_tunables;
-
-#define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */
-#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
-
-#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */
-#define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1) /* Max # of peer credits */
-
-/* when eagerly to return credits */
-#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
- IBLND_CREDIT_HIGHWATER_V1 : \
- t->lnd_peercredits_hiw)
-
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
- cb, dev, \
- ps, qpt)
-
-/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
-#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
-#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-
-#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */
-#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
-
-/************************/
-/* derived constants... */
-/* Pools (shared by connections on each CPT) */
-/* These pools can grow at runtime, so don't need give a very large value */
-#define IBLND_TX_POOL 256
-#define IBLND_FMR_POOL 256
-#define IBLND_FMR_POOL_FLUSH 192
-
-#define IBLND_RX_MSGS(c) \
- ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
-#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
-#define IBLND_RX_MSG_PAGES(c) \
- ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
-
-/* WRs and CQEs (per connection) */
-#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
-#define IBLND_SEND_WRS(c) \
- (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
- kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
-#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
-
-struct kib_hca_dev;
-
-/* o2iblnd can run over aliased interface */
-#ifdef IFALIASZ
-#define KIB_IFNAME_SIZE IFALIASZ
-#else
-#define KIB_IFNAME_SIZE 256
-#endif
-
-struct kib_dev {
- struct list_head ibd_list; /* chain on kib_devs */
- struct list_head ibd_fail_list; /* chain on kib_failed_devs */
- __u32 ibd_ifip; /* IPoIB interface IP */
-
- /* IPoIB interface name */
- char ibd_ifname[KIB_IFNAME_SIZE];
- int ibd_nnets; /* # nets extant */
-
- unsigned long ibd_next_failover;
- int ibd_failed_failover; /* # failover failures */
- unsigned int ibd_failover; /* failover in progress */
- unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */
- struct list_head ibd_nets;
- struct kib_hca_dev *ibd_hdev;
-};
-
-struct kib_hca_dev {
- struct rdma_cm_id *ibh_cmid; /* listener cmid */
- struct ib_device *ibh_ibdev; /* IB device */
- int ibh_page_shift; /* page shift of current HCA */
- int ibh_page_size; /* page size of current HCA */
- __u64 ibh_page_mask; /* page mask of current HCA */
- int ibh_mr_shift; /* bits shift of max MR size */
- __u64 ibh_mr_size; /* size of MR */
- struct ib_pd *ibh_pd; /* PD */
- struct kib_dev *ibh_dev; /* owner */
- atomic_t ibh_ref; /* refcount */
-};
-
-/** # of seconds to keep pool alive */
-#define IBLND_POOL_DEADLINE 300
-/** # of seconds to retry if allocation failed */
-#define IBLND_POOL_RETRY 1
-
-struct kib_pages {
- int ibp_npages; /* # pages */
- struct page *ibp_pages[0]; /* page array */
-};
-
-struct kib_pool;
-struct kib_poolset;
-
-typedef int (*kib_ps_pool_create_t)(struct kib_poolset *ps,
- int inc, struct kib_pool **pp_po);
-typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po);
-typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node);
-typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node);
-
-struct kib_net;
-
-#define IBLND_POOL_NAME_LEN 32
-
-struct kib_poolset {
- spinlock_t ps_lock; /* serialize */
- struct kib_net *ps_net; /* network it belongs to */
- char ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
- struct list_head ps_pool_list; /* list of pools */
- struct list_head ps_failed_pool_list;/* failed pool list */
- unsigned long ps_next_retry; /* time stamp for retry if */
- /* failed to allocate */
- int ps_increasing; /* is allocating new pool */
- int ps_pool_size; /* new pool size */
- int ps_cpt; /* CPT id */
-
- kib_ps_pool_create_t ps_pool_create; /* create a new pool */
- kib_ps_pool_destroy_t ps_pool_destroy; /* destroy a pool */
- kib_ps_node_init_t ps_node_init; /* initialize new allocated node */
- kib_ps_node_fini_t ps_node_fini; /* finalize node */
-};
-
-struct kib_pool {
- struct list_head po_list; /* chain on pool list */
- struct list_head po_free_list; /* pre-allocated node */
- struct kib_poolset *po_owner; /* pool_set of this pool */
- unsigned long po_deadline; /* deadline of this pool */
- int po_allocated; /* # of elements in use */
- int po_failed; /* pool is created on failed HCA */
- int po_size; /* # of pre-allocated elements */
-};
-
-struct kib_tx_poolset {
- struct kib_poolset tps_poolset; /* pool-set */
- __u64 tps_next_tx_cookie; /* cookie of TX */
-};
-
-struct kib_tx_pool {
- struct kib_pool tpo_pool; /* pool */
- struct kib_hca_dev *tpo_hdev; /* device for this pool */
- struct kib_tx *tpo_tx_descs; /* all the tx descriptors */
- struct kib_pages *tpo_tx_pages; /* premapped tx msg pages */
-};
-
-struct kib_fmr_poolset {
- spinlock_t fps_lock; /* serialize */
- struct kib_net *fps_net; /* IB network */
- struct list_head fps_pool_list; /* FMR pool list */
- struct list_head fps_failed_pool_list;/* FMR pool list */
- __u64 fps_version; /* validity stamp */
- int fps_cpt; /* CPT id */
- int fps_pool_size;
- int fps_flush_trigger;
- int fps_cache;
- int fps_increasing; /* is allocating new pool */
- unsigned long fps_next_retry; /* time stamp for retry if*/
- /* failed to allocate */
-};
-
-struct kib_fast_reg_descriptor { /* For fast registration */
- struct list_head frd_list;
- struct ib_send_wr frd_inv_wr;
- struct ib_reg_wr frd_fastreg_wr;
- struct ib_mr *frd_mr;
- bool frd_valid;
-};
-
-struct kib_fmr_pool {
- struct list_head fpo_list; /* chain on pool list */
- struct kib_hca_dev *fpo_hdev; /* device for this pool */
- struct kib_fmr_poolset *fpo_owner; /* owner of this pool */
- union {
- struct {
- struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
- } fmr;
- struct { /* For fast registration */
- struct list_head fpo_pool_list;
- int fpo_pool_size;
- } fast_reg;
- };
- unsigned long fpo_deadline; /* deadline of this pool */
- int fpo_failed; /* fmr pool is failed */
- int fpo_map_count; /* # of mapped FMR */
- int fpo_is_fmr;
-};
-
-struct kib_fmr {
- struct kib_fmr_pool *fmr_pool; /* pool of FMR */
- struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */
- struct kib_fast_reg_descriptor *fmr_frd;
- u32 fmr_key;
-};
-
-struct kib_net {
- struct list_head ibn_list; /* chain on struct kib_dev::ibd_nets */
- __u64 ibn_incarnation;/* my epoch */
- int ibn_init; /* initialisation state */
- int ibn_shutdown; /* shutting down? */
-
- atomic_t ibn_npeers; /* # peers extant */
- atomic_t ibn_nconns; /* # connections extant */
-
- struct kib_tx_poolset **ibn_tx_ps; /* tx pool-set */
- struct kib_fmr_poolset **ibn_fmr_ps; /* fmr pool-set */
-
- struct kib_dev *ibn_dev; /* underlying IB device */
-};
-
-#define KIB_THREAD_SHIFT 16
-#define KIB_THREAD_ID(cpt, tid) ((cpt) << KIB_THREAD_SHIFT | (tid))
-#define KIB_THREAD_CPT(id) ((id) >> KIB_THREAD_SHIFT)
-#define KIB_THREAD_TID(id) ((id) & ((1UL << KIB_THREAD_SHIFT) - 1))
-
-struct kib_sched_info {
- spinlock_t ibs_lock; /* serialise */
- wait_queue_head_t ibs_waitq; /* schedulers sleep here */
- struct list_head ibs_conns; /* conns to check for rx completions */
- int ibs_nthreads; /* number of scheduler threads */
- int ibs_nthreads_max; /* max allowed scheduler threads */
- int ibs_cpt; /* CPT id */
-};
-
-struct kib_data {
- int kib_init; /* initialisation state */
- int kib_shutdown; /* shut down? */
- struct list_head kib_devs; /* IB devices extant */
- struct list_head kib_failed_devs; /* list head of failed devices */
- wait_queue_head_t kib_failover_waitq; /* schedulers sleep here */
- atomic_t kib_nthreads; /* # live threads */
- rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */
- struct list_head *kib_peers; /* hash table of all my known peers */
- int kib_peer_hash_size; /* size of kib_peers */
- void *kib_connd; /* the connd task (serialisation assertions) */
- struct list_head kib_connd_conns; /* connections to setup/teardown */
- struct list_head kib_connd_zombies; /* connections with zero refcount */
- /* connections to reconnect */
- struct list_head kib_reconn_list;
- /* peers wait for reconnection */
- struct list_head kib_reconn_wait;
- /**
- * The second that peers are pulled out from \a kib_reconn_wait
- * for reconnection.
- */
- time64_t kib_reconn_sec;
-
- wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */
- spinlock_t kib_connd_lock; /* serialise */
- struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
- struct kib_sched_info **kib_scheds; /* percpt data for schedulers */
-};
-
-#define IBLND_INIT_NOTHING 0
-#define IBLND_INIT_DATA 1
-#define IBLND_INIT_ALL 2
-
-/************************************************************************
- * IB Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-struct kib_connparams {
- __u16 ibcp_queue_depth;
- __u16 ibcp_max_frags;
- __u32 ibcp_max_msg_size;
-} WIRE_ATTR;
-
-struct kib_immediate_msg {
- struct lnet_hdr ibim_hdr; /* portals header */
- char ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR;
-
-struct kib_rdma_frag {
- __u32 rf_nob; /* # bytes this frag */
- __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */
-} WIRE_ATTR;
-
-struct kib_rdma_desc {
- __u32 rd_key; /* local/remote key */
- __u32 rd_nfrags; /* # fragments */
- struct kib_rdma_frag rd_frags[0]; /* buffer frags */
-} WIRE_ATTR;
-
-struct kib_putreq_msg {
- struct lnet_hdr ibprm_hdr; /* portals header */
- __u64 ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR;
-
-struct kib_putack_msg {
- __u64 ibpam_src_cookie; /* reflected completion cookie */
- __u64 ibpam_dst_cookie; /* opaque completion cookie */
- struct kib_rdma_desc ibpam_rd; /* sender's sink buffer */
-} WIRE_ATTR;
-
-struct kib_get_msg {
- struct lnet_hdr ibgm_hdr; /* portals header */
- __u64 ibgm_cookie; /* opaque completion cookie */
- struct kib_rdma_desc ibgm_rd; /* rdma descriptor */
-} WIRE_ATTR;
-
-struct kib_completion_msg {
- __u64 ibcm_cookie; /* opaque completion cookie */
- __s32 ibcm_status; /* < 0 failure: >= 0 length */
-} WIRE_ATTR;
-
-struct kib_msg {
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ibm_magic; /* I'm an ibnal message */
- __u16 ibm_version; /* this is my version number */
-
- __u8 ibm_type; /* msg type */
- __u8 ibm_credits; /* returned credits */
- __u32 ibm_nob; /* # bytes in whole message */
- __u32 ibm_cksum; /* checksum (0 == no checksum) */
- __u64 ibm_srcnid; /* sender's NID */
- __u64 ibm_srcstamp; /* sender's incarnation */
- __u64 ibm_dstnid; /* destination's NID */
- __u64 ibm_dststamp; /* destination's incarnation */
-
- union {
- struct kib_connparams connparams;
- struct kib_immediate_msg immediate;
- struct kib_putreq_msg putreq;
- struct kib_putack_msg putack;
- struct kib_get_msg get;
- struct kib_completion_msg completion;
- } WIRE_ATTR ibm_u;
-} WIRE_ATTR;
-
-#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */
-
-#define IBLND_MSG_VERSION_1 0x11
-#define IBLND_MSG_VERSION_2 0x12
-#define IBLND_MSG_VERSION IBLND_MSG_VERSION_2
-
-#define IBLND_MSG_CONNREQ 0xc0 /* connection request */
-#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */
-#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */
-#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */
-#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */
-#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */
-#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */
-#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */
-#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
-#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
-
-struct kib_rej {
- __u32 ibr_magic; /* sender's magic */
- __u16 ibr_version; /* sender's version */
- __u8 ibr_why; /* reject reason */
- __u8 ibr_padding; /* padding */
- __u64 ibr_incarnation; /* incarnation of peer */
- struct kib_connparams ibr_cp; /* connection parameters */
-} WIRE_ATTR;
-
-/* connection rejection reasons */
-#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */
-#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */
-#define IBLND_REJECT_FATAL 3 /* Anything else */
-#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
-#define IBLND_REJECT_CONN_STALE 5 /* stale peer */
-/* peer's rdma frags doesn't match mine */
-#define IBLND_REJECT_RDMA_FRAGS 6
-/* peer's msg queue size doesn't match mine */
-#define IBLND_REJECT_MSG_QUEUE_SIZE 7
-
-/***********************************************************************/
-
-struct kib_rx { /* receive message */
- struct list_head rx_list; /* queue for attention */
- struct kib_conn *rx_conn; /* owning conn */
- int rx_nob; /* # bytes received (-1 while posted) */
- enum ib_wc_status rx_status; /* completion status */
- struct kib_msg *rx_msg; /* message buffer (host vaddr) */
- __u64 rx_msgaddr; /* message buffer (I/O addr) */
- DECLARE_PCI_UNMAP_ADDR(rx_msgunmap); /* for dma_unmap_single() */
- struct ib_recv_wr rx_wrq; /* receive work item... */
- struct ib_sge rx_sge; /* ...and its memory */
-};
-
-#define IBLND_POSTRX_DONT_POST 0 /* don't post */
-#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */
-#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
-#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */
-
-struct kib_tx { /* transmit message */
- struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
- struct kib_tx_pool *tx_pool; /* pool I'm from */
- struct kib_conn *tx_conn; /* owning conn */
- short tx_sending; /* # tx callbacks outstanding */
- short tx_queued; /* queued for sending */
- short tx_waiting; /* waiting for peer */
- int tx_status; /* LNET completion status */
- unsigned long tx_deadline; /* completion deadline */
- __u64 tx_cookie; /* completion cookie */
- struct lnet_msg *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
- struct kib_msg *tx_msg; /* message buffer (host vaddr) */
- __u64 tx_msgaddr; /* message buffer (I/O addr) */
- DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */
- int tx_nwrq; /* # send work items */
- struct ib_rdma_wr *tx_wrq; /* send work items... */
- struct ib_sge *tx_sge; /* ...and their memory */
- struct kib_rdma_desc *tx_rd; /* rdma descriptor */
- int tx_nfrags; /* # entries in... */
- struct scatterlist *tx_frags; /* dma_map_sg descriptor */
- __u64 *tx_pages; /* rdma phys page addrs */
- struct kib_fmr fmr; /* FMR */
- int tx_dmadir; /* dma direction */
-};
-
-struct kib_connvars {
- struct kib_msg cv_msg; /* connection-in-progress variables */
-};
-
-struct kib_conn {
- struct kib_sched_info *ibc_sched; /* scheduler information */
- struct kib_peer *ibc_peer; /* owning peer */
- struct kib_hca_dev *ibc_hdev; /* HCA bound on */
- struct list_head ibc_list; /* stash on peer's conn list */
- struct list_head ibc_sched_list; /* schedule for attention */
- __u16 ibc_version; /* version of connection */
- /* reconnect later */
- __u16 ibc_reconnect:1;
- __u64 ibc_incarnation; /* which instance of the peer */
- atomic_t ibc_refcount; /* # users */
- int ibc_state; /* what's happening */
- int ibc_nsends_posted; /* # uncompleted sends */
- int ibc_noops_posted; /* # uncompleted NOOPs */
- int ibc_credits; /* # credits I have */
- int ibc_outstanding_credits; /* # credits to return */
- int ibc_reserved_credits; /* # ACK/DONE msg credits */
- int ibc_comms_error; /* set on comms error */
- /* connections queue depth */
- __u16 ibc_queue_depth;
- /* connections max frags */
- __u16 ibc_max_frags;
- unsigned int ibc_nrx:16; /* receive buffers owned */
- unsigned int ibc_scheduled:1; /* scheduled for attention */
- unsigned int ibc_ready:1; /* CQ callback fired */
- unsigned long ibc_last_send; /* time of last send */
- struct list_head ibc_connd_list; /* link chain for */
- /* kiblnd_check_conns only */
- struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
- struct list_head ibc_tx_noops; /* IBLND_MSG_NOOPs for */
- /* IBLND_MSG_VERSION_1 */
- struct list_head ibc_tx_queue; /* sends that need a credit */
- struct list_head ibc_tx_queue_nocred; /* sends that don't need a */
- /* credit */
- struct list_head ibc_tx_queue_rsrvd; /* sends that need to */
- /* reserve an ACK/DONE msg */
- struct list_head ibc_active_txs; /* active tx awaiting completion */
- spinlock_t ibc_lock; /* serialise */
- struct kib_rx *ibc_rxs; /* the rx descs */
- struct kib_pages *ibc_rx_pages; /* premapped rx msg pages */
-
- struct rdma_cm_id *ibc_cmid; /* CM id */
- struct ib_cq *ibc_cq; /* completion queue */
-
- struct kib_connvars *ibc_connvars; /* in-progress connection state */
-};
-
-#define IBLND_CONN_INIT 0 /* being initialised */
-#define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */
-#define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */
-#define IBLND_CONN_ESTABLISHED 3 /* connection established */
-#define IBLND_CONN_CLOSING 4 /* being closed */
-#define IBLND_CONN_DISCONNECTED 5 /* disconnected */
-
-struct kib_peer {
- struct list_head ibp_list; /* stash on global peer list */
- lnet_nid_t ibp_nid; /* who's on the other end(s) */
- struct lnet_ni *ibp_ni; /* LNet interface */
- struct list_head ibp_conns; /* all active connections */
- struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- __u64 ibp_incarnation; /* incarnation of peer */
- /* when (in jiffies) I was last alive */
- unsigned long ibp_last_alive;
- /* # users */
- atomic_t ibp_refcount;
- /* version of peer */
- __u16 ibp_version;
- /* current passive connection attempts */
- unsigned short ibp_accepting;
- /* current active connection attempts */
- unsigned short ibp_connecting;
- /* reconnect this peer later */
- unsigned short ibp_reconnecting:1;
- /* counter of how many times we triggered a conn race */
- unsigned char ibp_races;
- /* # consecutive reconnection attempts to this peer */
- unsigned int ibp_reconnected;
- /* errno on closing this peer */
- int ibp_error;
- /* max map_on_demand */
- __u16 ibp_max_frags;
- /* max_peer_credits */
- __u16 ibp_queue_depth;
-};
-
-extern struct kib_data kiblnd_data;
-
-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev);
-
-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
-
-/* max # of fragments configured by user */
-static inline int
-kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- int mod;
-
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
- mod = tunables->lnd_map_on_demand;
- return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
-}
-
-static inline int
-kiblnd_rdma_frags(int version, struct lnet_ni *ni)
-{
- return version == IBLND_MSG_VERSION_1 ?
- (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
- kiblnd_cfg_rdma_frags(ni);
-}
-
-static inline int
-kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- int concurrent_sends;
-
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
- concurrent_sends = tunables->lnd_concurrent_sends;
-
- if (version == IBLND_MSG_VERSION_1) {
- if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
- return IBLND_MSG_QUEUE_SIZE_V1 * 2;
-
- if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
- return IBLND_MSG_QUEUE_SIZE_V1 / 2;
- }
-
- return concurrent_sends;
-}
-
-static inline void
-kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev)
-{
- LASSERT(atomic_read(&hdev->ibh_ref) > 0);
- atomic_inc(&hdev->ibh_ref);
-}
-
-static inline void
-kiblnd_hdev_decref(struct kib_hca_dev *hdev)
-{
- LASSERT(atomic_read(&hdev->ibh_ref) > 0);
- if (atomic_dec_and_test(&hdev->ibh_ref))
- kiblnd_hdev_destroy(hdev);
-}
-
-static inline int
-kiblnd_dev_can_failover(struct kib_dev *dev)
-{
- if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
- return 0;
-
- if (!*kiblnd_tunables.kib_dev_failover) /* disabled */
- return 0;
-
- if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
- return 1;
-
- return dev->ibd_can_failover;
-}
-
-#define kiblnd_conn_addref(conn) \
-do { \
- CDEBUG(D_NET, "conn[%p] (%d)++\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- atomic_inc(&(conn)->ibc_refcount); \
-} while (0)
-
-#define kiblnd_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \
- if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \
- list_add_tail(&(conn)->ibc_list, \
- &kiblnd_data.kib_connd_zombies); \
- wake_up(&kiblnd_data.kib_connd_waitq); \
- spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
- } \
-} while (0)
-
-#define kiblnd_peer_addref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read(&(peer)->ibp_refcount)); \
- atomic_inc(&(peer)->ibp_refcount); \
-} while (0)
-
-#define kiblnd_peer_decref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read(&(peer)->ibp_refcount)); \
- LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kiblnd_destroy_peer(peer); \
-} while (0)
-
-static inline bool
-kiblnd_peer_connecting(struct kib_peer *peer)
-{
- return peer->ibp_connecting ||
- peer->ibp_reconnecting ||
- peer->ibp_accepting;
-}
-
-static inline bool
-kiblnd_peer_idle(struct kib_peer *peer)
-{
- return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
-}
-
-static inline struct list_head *
-kiblnd_nid2peerlist(lnet_nid_t nid)
-{
- unsigned int hash =
- ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;
-
- return &kiblnd_data.kib_peers[hash];
-}
-
-static inline int
-kiblnd_peer_active(struct kib_peer *peer)
-{
- /* Am I in the peer hash table? */
- return !list_empty(&peer->ibp_list);
-}
-
-static inline struct kib_conn *
-kiblnd_get_conn_locked(struct kib_peer *peer)
-{
- LASSERT(!list_empty(&peer->ibp_conns));
-
- /* just return the first connection */
- return list_entry(peer->ibp_conns.next, struct kib_conn, ibc_list);
-}
-
-static inline int
-kiblnd_send_keepalive(struct kib_conn *conn)
-{
- return (*kiblnd_tunables.kib_keepalive > 0) &&
- cfs_time_after(jiffies, conn->ibc_last_send +
- msecs_to_jiffies(*kiblnd_tunables.kib_keepalive *
- MSEC_PER_SEC));
-}
-
-static inline int
-kiblnd_need_noop(struct kib_conn *conn)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
- if (conn->ibc_outstanding_credits <
- IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
- !kiblnd_send_keepalive(conn))
- return 0; /* No need to send NOOP */
-
- if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
- if (!list_empty(&conn->ibc_tx_queue_nocred))
- return 0; /* NOOP can be piggybacked */
-
- /* No tx to piggyback NOOP onto or no credit to send a tx */
- return (list_empty(&conn->ibc_tx_queue) ||
- !conn->ibc_credits);
- }
-
- if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
- !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
- !conn->ibc_credits) /* no credit */
- return 0;
-
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- !conn->ibc_outstanding_credits) /* giving back credits */
- return 0;
-
- /* No tx to piggyback NOOP onto or no credit to send a tx */
- return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
-}
-
-static inline void
-kiblnd_abort_receives(struct kib_conn *conn)
-{
- ib_modify_qp(conn->ibc_cmid->qp,
- &kiblnd_data.kib_error_qpa, IB_QP_STATE);
-}
-
-static inline const char *
-kiblnd_queue2str(struct kib_conn *conn, struct list_head *q)
-{
- if (q == &conn->ibc_tx_queue)
- return "tx_queue";
-
- if (q == &conn->ibc_tx_queue_rsrvd)
- return "tx_queue_rsrvd";
-
- if (q == &conn->ibc_tx_queue_nocred)
- return "tx_queue_nocred";
-
- if (q == &conn->ibc_active_txs)
- return "active_txs";
-
- LBUG();
- return NULL;
-}
-
-/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */
-/* lowest bits of the work request id to stash the work item type. */
-
-#define IBLND_WID_INVAL 0
-#define IBLND_WID_TX 1
-#define IBLND_WID_RX 2
-#define IBLND_WID_RDMA 3
-#define IBLND_WID_MR 4
-#define IBLND_WID_MASK 7UL
-
-static inline __u64
-kiblnd_ptr2wreqid(void *ptr, int type)
-{
- unsigned long lptr = (unsigned long)ptr;
-
- LASSERT(!(lptr & IBLND_WID_MASK));
- LASSERT(!(type & ~IBLND_WID_MASK));
- return (__u64)(lptr | type);
-}
-
-static inline void *
-kiblnd_wreqid2ptr(__u64 wreqid)
-{
- return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
-}
-
-static inline int
-kiblnd_wreqid2type(__u64 wreqid)
-{
- return wreqid & IBLND_WID_MASK;
-}
-
-static inline void
-kiblnd_set_conn_state(struct kib_conn *conn, int state)
-{
- conn->ibc_state = state;
- mb();
-}
-
-static inline void
-kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(struct kib_msg, ibm_u) + body_nob;
-}
-
-static inline int
-kiblnd_rd_size(struct kib_rdma_desc *rd)
-{
- int i;
- int size;
-
- for (i = size = 0; i < rd->rd_nfrags; i++)
- size += rd->rd_frags[i].rf_nob;
-
- return size;
-}
-
-static inline __u64
-kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index)
-{
- return rd->rd_frags[index].rf_addr;
-}
-
-static inline __u32
-kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
-{
- return rd->rd_frags[index].rf_nob;
-}
-
-static inline __u32
-kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index)
-{
- return rd->rd_key;
-}
-
-static inline int
-kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob)
-{
- if (nob < rd->rd_frags[index].rf_nob) {
- rd->rd_frags[index].rf_addr += nob;
- rd->rd_frags[index].rf_nob -= nob;
- } else {
- index++;
- }
-
- return index;
-}
-
-static inline int
-kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n)
-{
- LASSERT(msgtype == IBLND_MSG_GET_REQ ||
- msgtype == IBLND_MSG_PUT_ACK);
-
- return msgtype == IBLND_MSG_GET_REQ ?
- offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) :
- offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]);
-}
-
-static inline __u64
-kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return ib_dma_mapping_error(dev, dma_addr);
-}
-
-static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
- void *msg, size_t size,
- enum dma_data_direction direction)
-{
- return ib_dma_map_single(dev, msg, size, direction);
-}
-
-static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
- __u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- ib_dma_unmap_single(dev, addr, size, direction);
-}
-
-#define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0)
-#define KIBLND_UNMAP_ADDR(p, m, a) (a)
-
-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- return ib_dma_map_sg(dev, sg, nents, direction);
-}
-
-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- ib_dma_unmap_sg(dev, sg, nents, direction);
-}
-
-static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return ib_sg_dma_address(dev, sg);
-}
-
-static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return ib_sg_dma_len(dev, sg);
-}
-
-/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly */
-/* right because OFED1.2 defines it as const, to use it we have to add */
-/* (void *) cast to overcome "const" */
-
-#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
-#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-
-void kiblnd_map_rx_descs(struct kib_conn *conn);
-void kiblnd_unmap_rx_descs(struct kib_conn *conn);
-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node);
-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps);
-
-int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
- struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
- struct kib_fmr *fmr);
-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status);
-
-int kiblnd_tunables_setup(struct lnet_ni *ni);
-void kiblnd_tunables_init(void);
-
-int kiblnd_connd(void *arg);
-int kiblnd_scheduler(void *arg);
-int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
-int kiblnd_failover_thread(void *arg);
-
-int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages);
-
-int kiblnd_cm_callback(struct rdma_cm_id *cmid,
- struct rdma_cm_event *event);
-int kiblnd_translate_mtu(int value);
-
-int kiblnd_dev_failover(struct kib_dev *dev);
-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
- lnet_nid_t nid);
-void kiblnd_destroy_peer(struct kib_peer *peer);
-bool kiblnd_reconnect_peer(struct kib_peer *peer);
-void kiblnd_destroy_dev(struct kib_dev *dev);
-void kiblnd_unlink_peer_locked(struct kib_peer *peer);
-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid);
-int kiblnd_close_stale_conns_locked(struct kib_peer *peer,
- int version, __u64 incarnation);
-int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why);
-
-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer,
- struct rdma_cm_id *cmid,
- int state, int version);
-void kiblnd_destroy_conn(struct kib_conn *conn);
-void kiblnd_close_conn(struct kib_conn *conn, int error);
-void kiblnd_close_conn_locked(struct kib_conn *conn, int error);
-
-void kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid);
-void kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist,
- int status);
-
-void kiblnd_qp_event(struct ib_event *event, void *arg);
-void kiblnd_cq_event(struct ib_event *event, void *arg);
-void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
-
-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
- int credits, lnet_nid_t dstnid, __u64 dststamp);
-int kiblnd_unpack_msg(struct kib_msg *msg, int nob);
-int kiblnd_post_rx(struct kib_rx *rx, int credit);
-
-int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
-int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
deleted file mode 100644
index 6690a6cd4e34..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ /dev/null
@@ -1,3751 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd_cb.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "o2iblnd.h"
-
-#define MAX_CONN_RACES_BEFORE_ABORT 20
-
-static void kiblnd_peer_alive(struct kib_peer *peer);
-static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
-static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx,
- int type, int body_nob);
-static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
- int resid, struct kib_rdma_desc *dstrd,
- __u64 dstcookie);
-static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
-static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
-static void kiblnd_unmap_tx(struct lnet_ni *ni, struct kib_tx *tx);
-static void kiblnd_check_sends_locked(struct kib_conn *conn);
-
-static void
-kiblnd_tx_done(struct lnet_ni *ni, struct kib_tx *tx)
-{
- struct lnet_msg *lntmsg[2];
- struct kib_net *net = ni->ni_data;
- int rc;
- int i;
-
- LASSERT(net);
- LASSERT(!in_interrupt());
- LASSERT(!tx->tx_queued); /* mustn't be queued for sending */
- LASSERT(!tx->tx_sending); /* mustn't be awaiting sent callback */
- LASSERT(!tx->tx_waiting); /* mustn't be awaiting peer response */
- LASSERT(tx->tx_pool);
-
- kiblnd_unmap_tx(ni, tx);
-
- /* tx may have up to 2 lnet msgs to finalise */
- lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
- rc = tx->tx_status;
-
- if (tx->tx_conn) {
- LASSERT(ni == tx->tx_conn->ibc_peer->ibp_ni);
-
- kiblnd_conn_decref(tx->tx_conn);
- tx->tx_conn = NULL;
- }
-
- tx->tx_nwrq = 0;
- tx->tx_status = 0;
-
- kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
-
- /* delay finalize until my descs have been freed */
- for (i = 0; i < 2; i++) {
- if (!lntmsg[i])
- continue;
-
- lnet_finalize(ni, lntmsg[i], rc);
- }
-}
-
-void
-kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int status)
-{
- struct kib_tx *tx;
-
- while (!list_empty(txlist)) {
- tx = list_entry(txlist->next, struct kib_tx, tx_list);
-
- list_del(&tx->tx_list);
- /* complete now */
- tx->tx_waiting = 0;
- tx->tx_status = status;
- kiblnd_tx_done(ni, tx);
- }
-}
-
-static struct kib_tx *
-kiblnd_get_idle_tx(struct lnet_ni *ni, lnet_nid_t target)
-{
- struct kib_net *net = (struct kib_net *)ni->ni_data;
- struct list_head *node;
- struct kib_tx *tx;
- struct kib_tx_poolset *tps;
-
- tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
- node = kiblnd_pool_alloc_node(&tps->tps_poolset);
- if (!node)
- return NULL;
- tx = list_entry(node, struct kib_tx, tx_list);
-
- LASSERT(!tx->tx_nwrq);
- LASSERT(!tx->tx_queued);
- LASSERT(!tx->tx_sending);
- LASSERT(!tx->tx_waiting);
- LASSERT(!tx->tx_status);
- LASSERT(!tx->tx_conn);
- LASSERT(!tx->tx_lntmsg[0]);
- LASSERT(!tx->tx_lntmsg[1]);
- LASSERT(!tx->tx_nfrags);
-
- return tx;
-}
-
-static void
-kiblnd_drop_rx(struct kib_rx *rx)
-{
- struct kib_conn *conn = rx->rx_conn;
- struct kib_sched_info *sched = conn->ibc_sched;
- unsigned long flags;
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
- LASSERT(conn->ibc_nrx > 0);
- conn->ibc_nrx--;
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- kiblnd_conn_decref(conn);
-}
-
-int
-kiblnd_post_rx(struct kib_rx *rx, int credit)
-{
- struct kib_conn *conn = rx->rx_conn;
- struct kib_net *net = conn->ibc_peer->ibp_ni->ni_data;
- struct ib_recv_wr *bad_wrq = NULL;
- int rc;
-
- LASSERT(net);
- LASSERT(!in_interrupt());
- LASSERT(credit == IBLND_POSTRX_NO_CREDIT ||
- credit == IBLND_POSTRX_PEER_CREDIT ||
- credit == IBLND_POSTRX_RSRVD_CREDIT);
-
- rx->rx_sge.lkey = conn->ibc_hdev->ibh_pd->local_dma_lkey;
- rx->rx_sge.addr = rx->rx_msgaddr;
- rx->rx_sge.length = IBLND_MSG_SIZE;
-
- rx->rx_wrq.next = NULL;
- rx->rx_wrq.sg_list = &rx->rx_sge;
- rx->rx_wrq.num_sge = 1;
- rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX);
-
- LASSERT(conn->ibc_state >= IBLND_CONN_INIT);
- LASSERT(rx->rx_nob >= 0); /* not posted */
-
- if (conn->ibc_state > IBLND_CONN_ESTABLISHED) {
- kiblnd_drop_rx(rx); /* No more posts for this rx */
- return 0;
- }
-
- rx->rx_nob = -1; /* flag posted */
-
- /* NB: need an extra reference after ib_post_recv because we don't
- * own this rx (and rx::rx_conn) anymore, LU-5678.
- */
- kiblnd_conn_addref(conn);
- rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq);
- if (unlikely(rc)) {
- CERROR("Can't post rx for %s: %d, bad_wrq: %p\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq);
- rx->rx_nob = 0;
- }
-
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */
- goto out;
-
- if (unlikely(rc)) {
- kiblnd_close_conn(conn, rc);
- kiblnd_drop_rx(rx); /* No more posts for this rx */
- goto out;
- }
-
- if (credit == IBLND_POSTRX_NO_CREDIT)
- goto out;
-
- spin_lock(&conn->ibc_lock);
- if (credit == IBLND_POSTRX_PEER_CREDIT)
- conn->ibc_outstanding_credits++;
- else
- conn->ibc_reserved_credits++;
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
-out:
- kiblnd_conn_decref(conn);
- return rc;
-}
-
-static struct kib_tx *
-kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, __u64 cookie)
-{
- struct list_head *tmp;
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list);
-
- LASSERT(!tx->tx_queued);
- LASSERT(tx->tx_sending || tx->tx_waiting);
-
- if (tx->tx_cookie != cookie)
- continue;
-
- if (tx->tx_waiting &&
- tx->tx_msg->ibm_type == txtype)
- return tx;
-
- CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
- tx->tx_waiting ? "" : "NOT ",
- tx->tx_msg->ibm_type, txtype);
- }
- return NULL;
-}
-
-static void
-kiblnd_handle_completion(struct kib_conn *conn, int txtype, int status, __u64 cookie)
-{
- struct kib_tx *tx;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- int idle;
-
- spin_lock(&conn->ibc_lock);
-
- tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie);
- if (!tx) {
- spin_unlock(&conn->ibc_lock);
-
- CWARN("Unmatched completion type %x cookie %#llx from %s\n",
- txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_close_conn(conn, -EPROTO);
- return;
- }
-
- if (!tx->tx_status) { /* success so far */
- if (status < 0) /* failed? */
- tx->tx_status = status;
- else if (txtype == IBLND_MSG_GET_REQ)
- lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status);
- }
-
- tx->tx_waiting = 0;
-
- idle = !tx->tx_queued && !tx->tx_sending;
- if (idle)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kiblnd_tx_done(ni, tx);
-}
-
-static void
-kiblnd_send_completion(struct kib_conn *conn, int type, int status, __u64 cookie)
-{
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- struct kib_tx *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
-
- if (!tx) {
- CERROR("Can't get tx for completion %x for %s\n",
- type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- tx->tx_msg->ibm_u.completion.ibcm_status = status;
- tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
- kiblnd_init_tx_msg(ni, tx, type, sizeof(struct kib_completion_msg));
-
- kiblnd_queue_tx(tx, conn);
-}
-
-static void
-kiblnd_handle_rx(struct kib_rx *rx)
-{
- struct kib_msg *msg = rx->rx_msg;
- struct kib_conn *conn = rx->rx_conn;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- int credits = msg->ibm_credits;
- struct kib_tx *tx;
- int rc = 0;
- int rc2;
- int post_credit;
-
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- CDEBUG(D_NET, "Received %x[%d] from %s\n",
- msg->ibm_type, credits,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- if (credits) {
- /* Have I received credits that will let me send? */
- spin_lock(&conn->ibc_lock);
-
- if (conn->ibc_credits + credits >
- conn->ibc_queue_depth) {
- rc2 = conn->ibc_credits;
- spin_unlock(&conn->ibc_lock);
-
- CERROR("Bad credits from %s: %d + %d > %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- rc2, credits, conn->ibc_queue_depth);
-
- kiblnd_close_conn(conn, -EPROTO);
- kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
- return;
- }
-
- conn->ibc_credits += credits;
-
- /* This ensures the credit taken by NOOP can be returned */
- if (msg->ibm_type == IBLND_MSG_NOOP &&
- !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
- conn->ibc_outstanding_credits++;
-
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Bad IBLND message type %x from %s\n",
- msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- post_credit = IBLND_POSTRX_NO_CREDIT;
- rc = -EPROTO;
- break;
-
- case IBLND_MSG_NOOP:
- if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
- post_credit = IBLND_POSTRX_NO_CREDIT;
- break;
- }
-
- if (credits) /* credit already posted */
- post_credit = IBLND_POSTRX_NO_CREDIT;
- else /* a keepalive NOOP */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_IMMEDIATE:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr,
- msg->ibm_srcnid, rx, 0);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_PUT_REQ:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr,
- msg->ibm_srcnid, rx, 1);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_PUT_NAK:
- CWARN("PUT_NACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBLND_MSG_PUT_ACK:
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
-
- spin_lock(&conn->ibc_lock);
- tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ,
- msg->ibm_u.putack.ibpam_src_cookie);
- if (tx)
- list_del(&tx->tx_list);
- spin_unlock(&conn->ibc_lock);
-
- if (!tx) {
- CERROR("Unmatched PUT_ACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
- }
-
- LASSERT(tx->tx_waiting);
- /*
- * CAVEAT EMPTOR: I could be racing with tx_complete, but...
- * (a) I can overwrite tx_msg since my peer has received it!
- * (b) tx_waiting set tells tx_complete() it's not done.
- */
- tx->tx_nwrq = 0; /* overwrite PUT_REQ */
-
- rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE,
- kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd),
- &msg->ibm_u.putack.ibpam_rd,
- msg->ibm_u.putack.ibpam_dst_cookie);
- if (rc2 < 0)
- CERROR("Can't setup rdma for PUT to %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
-
- spin_lock(&conn->ibc_lock);
- tx->tx_waiting = 0; /* clear waiting and queue atomically */
- kiblnd_queue_tx_locked(tx, conn);
- spin_unlock(&conn->ibc_lock);
- break;
-
- case IBLND_MSG_PUT_DONE:
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBLND_MSG_GET_REQ:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr,
- msg->ibm_srcnid, rx, 1);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_GET_DONE:
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
- }
-
- if (rc < 0) /* protocol error */
- kiblnd_close_conn(conn, rc);
-
- if (post_credit != IBLND_POSTRX_DONT_POST)
- kiblnd_post_rx(rx, post_credit);
-}
-
-static void
-kiblnd_rx_complete(struct kib_rx *rx, int status, int nob)
-{
- struct kib_msg *msg = rx->rx_msg;
- struct kib_conn *conn = rx->rx_conn;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- struct kib_net *net = ni->ni_data;
- int rc;
- int err = -EIO;
-
- LASSERT(net);
- LASSERT(rx->rx_nob < 0); /* was posted */
- rx->rx_nob = 0; /* isn't now */
-
- if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
- goto ignore;
-
- if (status != IB_WC_SUCCESS) {
- CNETERR("Rx from %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
- goto failed;
- }
-
- LASSERT(nob >= 0);
- rx->rx_nob = nob;
-
- rc = kiblnd_unpack_msg(msg, rx->rx_nob);
- if (rc) {
- CERROR("Error %d unpacking rx from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
- msg->ibm_dstnid != ni->ni_nid ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != net->ibn_incarnation) {
- CERROR("Stale rx from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- err = -ESTALE;
- goto failed;
- }
-
- /* set time last known alive */
- kiblnd_peer_alive(conn->ibc_peer);
-
- /* racing with connection establishment/teardown! */
-
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- unsigned long flags;
-
- write_lock_irqsave(g_lock, flags);
- /* must check holding global lock to eliminate race */
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
- write_unlock_irqrestore(g_lock, flags);
- return;
- }
- write_unlock_irqrestore(g_lock, flags);
- }
- kiblnd_handle_rx(rx);
- return;
-
- failed:
- CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
- kiblnd_close_conn(conn, err);
- ignore:
- kiblnd_drop_rx(rx); /* Don't re-post rx. */
-}
-
-static struct page *
-kiblnd_kvaddr_to_page(unsigned long vaddr)
-{
- struct page *page;
-
- if (is_vmalloc_addr((void *)vaddr)) {
- page = vmalloc_to_page((void *)vaddr);
- LASSERT(page);
- return page;
- }
-#ifdef CONFIG_HIGHMEM
- if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
- /* No highmem pages only used for bulk (kiov) I/O */
- CERROR("find page for address in highmem\n");
- LBUG();
- }
-#endif
- page = virt_to_page(vaddr);
- LASSERT(page);
- return page;
-}
-
-static int
-kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc *rd, __u32 nob)
-{
- struct kib_hca_dev *hdev;
- struct kib_fmr_poolset *fps;
- int cpt;
- int rc;
-
- LASSERT(tx->tx_pool);
- LASSERT(tx->tx_pool->tpo_pool.po_owner);
-
- hdev = tx->tx_pool->tpo_hdev;
- cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
-
- fps = net->ibn_fmr_ps[cpt];
- rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr);
- if (rc) {
- CERROR("Can't map %u bytes: %d\n", nob, rc);
- return rc;
- }
-
- /*
- * If rd is not tx_rd, it's going to get sent to a peer, who will need
- * the rkey
- */
- rd->rd_key = tx->fmr.fmr_key;
- rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
- rd->rd_frags[0].rf_nob = nob;
- rd->rd_nfrags = 1;
-
- return 0;
-}
-
-static void kiblnd_unmap_tx(struct lnet_ni *ni, struct kib_tx *tx)
-{
- struct kib_net *net = ni->ni_data;
-
- LASSERT(net);
-
- if (net->ibn_fmr_ps)
- kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status);
-
- if (tx->tx_nfrags) {
- kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
- tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
- tx->tx_nfrags = 0;
- }
-}
-
-static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
- struct kib_rdma_desc *rd, int nfrags)
-{
- struct kib_net *net = ni->ni_data;
- struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev;
- __u32 nob;
- int i;
-
- /*
- * If rd is not tx_rd, it's going to get sent to a peer and I'm the
- * RDMA sink
- */
- tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
- tx->tx_nfrags = nfrags;
-
- rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags,
- tx->tx_nfrags, tx->tx_dmadir);
-
- for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
- rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len(
- hdev->ibh_ibdev, &tx->tx_frags[i]);
- rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
- hdev->ibh_ibdev, &tx->tx_frags[i]);
- nob += rd->rd_frags[i].rf_nob;
- }
-
- if (net->ibn_fmr_ps)
- return kiblnd_fmr_map_tx(net, tx, rd, nob);
-
- return -EINVAL;
-}
-
-static int
-kiblnd_setup_rd_iov(struct lnet_ni *ni, struct kib_tx *tx,
- struct kib_rdma_desc *rd, unsigned int niov,
- const struct kvec *iov, int offset, int nob)
-{
- struct kib_net *net = ni->ni_data;
- struct page *page;
- struct scatterlist *sg;
- unsigned long vaddr;
- int fragnob;
- int page_offset;
-
- LASSERT(nob > 0);
- LASSERT(niov > 0);
- LASSERT(net);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT(niov > 0);
- }
-
- sg = tx->tx_frags;
- do {
- LASSERT(niov > 0);
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
- page_offset = vaddr & (PAGE_SIZE - 1);
- page = kiblnd_kvaddr_to_page(vaddr);
- if (!page) {
- CERROR("Can't find page\n");
- return -EFAULT;
- }
-
- fragnob = min((int)(iov->iov_len - offset), nob);
- fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
- sg_set_page(sg, page, fragnob, page_offset);
- sg = sg_next(sg);
- if (!sg) {
- CERROR("lacking enough sg entries to map tx\n");
- return -EFAULT;
- }
-
- if (offset + fragnob < iov->iov_len) {
- offset += fragnob;
- } else {
- offset = 0;
- iov++;
- niov--;
- }
- nob -= fragnob;
- } while (nob > 0);
-
- return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
-}
-
-static int
-kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
- struct kib_rdma_desc *rd, int nkiov,
- const struct bio_vec *kiov, int offset, int nob)
-{
- struct kib_net *net = ni->ni_data;
- struct scatterlist *sg;
- int fragnob;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT(nob > 0);
- LASSERT(nkiov > 0);
- LASSERT(net);
-
- while (offset >= kiov->bv_len) {
- offset -= kiov->bv_len;
- nkiov--;
- kiov++;
- LASSERT(nkiov > 0);
- }
-
- sg = tx->tx_frags;
- do {
- LASSERT(nkiov > 0);
-
- fragnob = min((int)(kiov->bv_len - offset), nob);
-
- sg_set_page(sg, kiov->bv_page, fragnob,
- kiov->bv_offset + offset);
- sg = sg_next(sg);
- if (!sg) {
- CERROR("lacking enough sg entries to map tx\n");
- return -EFAULT;
- }
-
- offset = 0;
- kiov++;
- nkiov--;
- nob -= fragnob;
- } while (nob > 0);
-
- return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
-}
-
-static int
-kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
- __must_hold(&conn->ibc_lock)
-{
- struct kib_msg *msg = tx->tx_msg;
- struct kib_peer *peer = conn->ibc_peer;
- struct lnet_ni *ni = peer->ibp_ni;
- int ver = conn->ibc_version;
- int rc;
- int done;
-
- LASSERT(tx->tx_queued);
- /* We rely on this for QP sizing */
- LASSERT(tx->tx_nwrq > 0);
-
- LASSERT(!credit || credit == 1);
- LASSERT(conn->ibc_outstanding_credits >= 0);
- LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth);
- LASSERT(conn->ibc_credits >= 0);
- LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
-
- if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) {
- /* tx completions outstanding... */
- CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(peer->ibp_nid));
- return -EAGAIN;
- }
-
- if (credit && !conn->ibc_credits) { /* no credits */
- CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(peer->ibp_nid));
- return -EAGAIN;
- }
-
- if (credit && !IBLND_OOB_CAPABLE(ver) &&
- conn->ibc_credits == 1 && /* last credit reserved */
- msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */
- CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(peer->ibp_nid));
- return -EAGAIN;
- }
-
- /* NB don't drop ibc_lock before bumping tx_sending */
- list_del(&tx->tx_list);
- tx->tx_queued = 0;
-
- if (msg->ibm_type == IBLND_MSG_NOOP &&
- (!kiblnd_need_noop(conn) || /* redundant NOOP */
- (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */
- conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
- /*
- * OK to drop when posted enough NOOPs, since
- * kiblnd_check_sends_locked will queue NOOP again when
- * posted NOOPs complete
- */
- spin_unlock(&conn->ibc_lock);
- kiblnd_tx_done(peer->ibp_ni, tx);
- spin_lock(&conn->ibc_lock);
- CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_noops_posted);
- return 0;
- }
-
- kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
- peer->ibp_nid, conn->ibc_incarnation);
-
- conn->ibc_credits -= credit;
- conn->ibc_outstanding_credits = 0;
- conn->ibc_nsends_posted++;
- if (msg->ibm_type == IBLND_MSG_NOOP)
- conn->ibc_noops_posted++;
-
- /*
- * CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA
- * PUT. If so, it was first queued here as a PUT_REQ, sent and
- * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
- * and then re-queued here. It's (just) possible that
- * tx_sending is non-zero if we've not done the tx_complete()
- * from the first send; hence the ++ rather than = below.
- */
- tx->tx_sending++;
- list_add(&tx->tx_list, &conn->ibc_active_txs);
-
- /* I'm still holding ibc_lock! */
- if (conn->ibc_state != IBLND_CONN_ESTABLISHED) {
- rc = -ECONNABORTED;
- } else if (tx->tx_pool->tpo_pool.po_failed ||
- conn->ibc_hdev != tx->tx_pool->tpo_hdev) {
- /* close_conn will launch failover */
- rc = -ENETDOWN;
- } else {
- struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd;
- struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
- struct ib_send_wr *wrq = &tx->tx_wrq[0].wr;
-
- if (frd) {
- if (!frd->frd_valid) {
- wrq = &frd->frd_inv_wr;
- wrq->next = &frd->frd_fastreg_wr.wr;
- } else {
- wrq = &frd->frd_fastreg_wr.wr;
- }
- frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr;
- }
-
- LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
- "bad wr_id %llx, opc %d, flags %d, peer: %s\n",
- bad->wr_id, bad->opcode, bad->send_flags,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- bad = NULL;
- rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad);
- }
-
- conn->ibc_last_send = jiffies;
-
- if (!rc)
- return 0;
-
- /*
- * NB credits are transferred in the actual
- * message, which can only be the last work item
- */
- conn->ibc_credits += credit;
- conn->ibc_outstanding_credits += msg->ibm_credits;
- conn->ibc_nsends_posted--;
- if (msg->ibm_type == IBLND_MSG_NOOP)
- conn->ibc_noops_posted--;
-
- tx->tx_status = rc;
- tx->tx_waiting = 0;
- tx->tx_sending--;
-
- done = !tx->tx_sending;
- if (done)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
- CERROR("Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(peer->ibp_nid));
- else
- CDEBUG(D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(peer->ibp_nid));
-
- kiblnd_close_conn(conn, rc);
-
- if (done)
- kiblnd_tx_done(peer->ibp_ni, tx);
-
- spin_lock(&conn->ibc_lock);
-
- return -EIO;
-}
-
-static void
-kiblnd_check_sends_locked(struct kib_conn *conn)
-{
- int ver = conn->ibc_version;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- struct kib_tx *tx;
-
- /* Don't send anything until after the connection is established */
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- CDEBUG(D_NET, "%s too soon\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
- LASSERT(!IBLND_OOB_CAPABLE(ver) ||
- conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
- LASSERT(conn->ibc_reserved_credits >= 0);
-
- while (conn->ibc_reserved_credits > 0 &&
- !list_empty(&conn->ibc_tx_queue_rsrvd)) {
- tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
- struct kib_tx, tx_list);
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
- conn->ibc_reserved_credits--;
- }
-
- if (kiblnd_need_noop(conn)) {
- spin_unlock(&conn->ibc_lock);
-
- tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
- if (tx)
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0);
-
- spin_lock(&conn->ibc_lock);
- if (tx)
- kiblnd_queue_tx_locked(tx, conn);
- }
-
- for (;;) {
- int credit;
-
- if (!list_empty(&conn->ibc_tx_queue_nocred)) {
- credit = 0;
- tx = list_entry(conn->ibc_tx_queue_nocred.next,
- struct kib_tx, tx_list);
- } else if (!list_empty(&conn->ibc_tx_noops)) {
- LASSERT(!IBLND_OOB_CAPABLE(ver));
- credit = 1;
- tx = list_entry(conn->ibc_tx_noops.next,
- struct kib_tx, tx_list);
- } else if (!list_empty(&conn->ibc_tx_queue)) {
- credit = 1;
- tx = list_entry(conn->ibc_tx_queue.next,
- struct kib_tx, tx_list);
- } else {
- break;
- }
-
- if (kiblnd_post_tx_locked(conn, tx, credit))
- break;
- }
-}
-
-static void
-kiblnd_tx_complete(struct kib_tx *tx, int status)
-{
- int failed = (status != IB_WC_SUCCESS);
- struct kib_conn *conn = tx->tx_conn;
- int idle;
-
- LASSERT(tx->tx_sending > 0);
-
- if (failed) {
- if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
- CNETERR("Tx -> %s cookie %#llx sending %d waiting %d: failed %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
- status);
-
- kiblnd_close_conn(conn, -EIO);
- } else {
- kiblnd_peer_alive(conn->ibc_peer);
- }
-
- spin_lock(&conn->ibc_lock);
-
- /*
- * I could be racing with rdma completion. Whoever makes 'tx' idle
- * gets to free it, which also drops its ref on 'conn'.
- */
- tx->tx_sending--;
- conn->ibc_nsends_posted--;
- if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP)
- conn->ibc_noops_posted--;
-
- if (failed) {
- tx->tx_waiting = 0; /* don't wait for peer */
- tx->tx_status = -EIO;
- }
-
- idle = !tx->tx_sending && /* This is the final callback */
- !tx->tx_waiting && /* Not waiting for peer */
- !tx->tx_queued; /* Not re-queued (PUT_DONE) */
- if (idle)
- list_del(&tx->tx_list);
-
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-}
-
-static void
-kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type,
- int body_nob)
-{
- struct kib_hca_dev *hdev = tx->tx_pool->tpo_hdev;
- struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
- struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
- int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
-
- LASSERT(tx->tx_nwrq >= 0);
- LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
- LASSERT(nob <= IBLND_MSG_SIZE);
-
- kiblnd_init_msg(tx->tx_msg, type, body_nob);
-
- sge->lkey = hdev->ibh_pd->local_dma_lkey;
- sge->addr = tx->tx_msgaddr;
- sge->length = nob;
-
- memset(wrq, 0, sizeof(*wrq));
-
- wrq->wr.next = NULL;
- wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
- wrq->wr.sg_list = sge;
- wrq->wr.num_sge = 1;
- wrq->wr.opcode = IB_WR_SEND;
- wrq->wr.send_flags = IB_SEND_SIGNALED;
-
- tx->tx_nwrq++;
-}
-
-static int
-kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
- int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie)
-{
- struct kib_msg *ibmsg = tx->tx_msg;
- struct kib_rdma_desc *srcrd = tx->tx_rd;
- struct ib_sge *sge = &tx->tx_sge[0];
- struct ib_rdma_wr *wrq, *next;
- int rc = resid;
- int srcidx = 0;
- int dstidx = 0;
- int wrknob;
-
- LASSERT(!in_interrupt());
- LASSERT(!tx->tx_nwrq);
- LASSERT(type == IBLND_MSG_GET_DONE ||
- type == IBLND_MSG_PUT_DONE);
-
- if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
- CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_max_frags << PAGE_SHIFT,
- kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
- rc = -EMSGSIZE;
- goto too_big;
- }
-
- while (resid > 0) {
- if (srcidx >= srcrd->rd_nfrags) {
- CERROR("Src buffer exhausted: %d frags\n", srcidx);
- rc = -EPROTO;
- break;
- }
-
- if (dstidx == dstrd->rd_nfrags) {
- CERROR("Dst buffer exhausted: %d frags\n", dstidx);
- rc = -EPROTO;
- break;
- }
-
- if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
- CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- IBLND_MAX_RDMA_FRAGS,
- srcidx, srcrd->rd_nfrags,
- dstidx, dstrd->rd_nfrags);
- rc = -EMSGSIZE;
- break;
- }
-
- wrknob = min3(kiblnd_rd_frag_size(srcrd, srcidx),
- kiblnd_rd_frag_size(dstrd, dstidx),
- (__u32)resid);
-
- sge = &tx->tx_sge[tx->tx_nwrq];
- sge->addr = kiblnd_rd_frag_addr(srcrd, srcidx);
- sge->lkey = kiblnd_rd_frag_key(srcrd, srcidx);
- sge->length = wrknob;
-
- wrq = &tx->tx_wrq[tx->tx_nwrq];
- next = wrq + 1;
-
- wrq->wr.next = &next->wr;
- wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
- wrq->wr.sg_list = sge;
- wrq->wr.num_sge = 1;
- wrq->wr.opcode = IB_WR_RDMA_WRITE;
- wrq->wr.send_flags = 0;
-
- wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
- wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx);
-
- srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
- dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
-
- resid -= wrknob;
-
- tx->tx_nwrq++;
- wrq++;
- sge++;
- }
-too_big:
- if (rc < 0) /* no RDMA if completing with failure */
- tx->tx_nwrq = 0;
-
- ibmsg->ibm_u.completion.ibcm_status = rc;
- ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
- kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx,
- type, sizeof(struct kib_completion_msg));
-
- return rc;
-}
-
-static void
-kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn)
-{
- struct list_head *q;
-
- LASSERT(tx->tx_nwrq > 0); /* work items set up */
- LASSERT(!tx->tx_queued); /* not queued for sending already */
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- tx->tx_queued = 1;
- tx->tx_deadline = jiffies +
- msecs_to_jiffies(*kiblnd_tunables.kib_timeout *
- MSEC_PER_SEC);
-
- if (!tx->tx_conn) {
- kiblnd_conn_addref(conn);
- tx->tx_conn = conn;
- LASSERT(tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE);
- } else {
- /* PUT_DONE first attached to conn as a PUT_REQ */
- LASSERT(tx->tx_conn == conn);
- LASSERT(tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE);
- }
-
- switch (tx->tx_msg->ibm_type) {
- default:
- LBUG();
-
- case IBLND_MSG_PUT_REQ:
- case IBLND_MSG_GET_REQ:
- q = &conn->ibc_tx_queue_rsrvd;
- break;
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_ACK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- q = &conn->ibc_tx_queue_nocred;
- break;
-
- case IBLND_MSG_NOOP:
- if (IBLND_OOB_CAPABLE(conn->ibc_version))
- q = &conn->ibc_tx_queue_nocred;
- else
- q = &conn->ibc_tx_noops;
- break;
-
- case IBLND_MSG_IMMEDIATE:
- q = &conn->ibc_tx_queue;
- break;
- }
-
- list_add_tail(&tx->tx_list, q);
-}
-
-static void
-kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
-{
- spin_lock(&conn->ibc_lock);
- kiblnd_queue_tx_locked(tx, conn);
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-}
-
-static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
- struct sockaddr_in *srcaddr,
- struct sockaddr_in *dstaddr,
- int timeout_ms)
-{
- unsigned short port;
- int rc;
-
- /* allow the port to be reused */
- rc = rdma_set_reuseaddr(cmid, 1);
- if (rc) {
- CERROR("Unable to set reuse on cmid: %d\n", rc);
- return rc;
- }
-
- /* look for a free privileged port */
- for (port = PROT_SOCK - 1; port > 0; port--) {
- srcaddr->sin_port = htons(port);
- rc = rdma_resolve_addr(cmid,
- (struct sockaddr *)srcaddr,
- (struct sockaddr *)dstaddr,
- timeout_ms);
- if (!rc) {
- CDEBUG(D_NET, "bound to port %hu\n", port);
- return 0;
- } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
- CDEBUG(D_NET, "bind to port %hu failed: %d\n",
- port, rc);
- } else {
- return rc;
- }
- }
-
- CERROR("Failed to bind to a free privileged port\n");
- return rc;
-}
-
-static void
-kiblnd_connect_peer(struct kib_peer *peer)
-{
- struct rdma_cm_id *cmid;
- struct kib_dev *dev;
- struct kib_net *net = peer->ibp_ni->ni_data;
- struct sockaddr_in srcaddr;
- struct sockaddr_in dstaddr;
- int rc;
-
- LASSERT(net);
- LASSERT(peer->ibp_connecting > 0);
- LASSERT(!peer->ibp_reconnecting);
-
- cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
- IB_QPT_RC);
-
- if (IS_ERR(cmid)) {
- CERROR("Can't create CMID for %s: %ld\n",
- libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
- rc = PTR_ERR(cmid);
- goto failed;
- }
-
- dev = net->ibn_dev;
- memset(&srcaddr, 0, sizeof(srcaddr));
- srcaddr.sin_family = AF_INET;
- srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
-
- memset(&dstaddr, 0, sizeof(dstaddr));
- dstaddr.sin_family = AF_INET;
- dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
- dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
-
- kiblnd_peer_addref(peer); /* cmid's ref */
-
- if (*kiblnd_tunables.kib_use_priv_port) {
- rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
- *kiblnd_tunables.kib_timeout * 1000);
- } else {
- rc = rdma_resolve_addr(cmid,
- (struct sockaddr *)&srcaddr,
- (struct sockaddr *)&dstaddr,
- *kiblnd_tunables.kib_timeout * 1000);
- }
- if (rc) {
- /* Can't initiate address resolution: */
- CERROR("Can't resolve addr for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- goto failed2;
- }
-
- LASSERT(cmid->device);
- CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n",
- libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
- &dev->ibd_ifip, cmid->device->name);
-
- return;
-
- failed2:
- kiblnd_peer_connect_failed(peer, 1, rc);
- kiblnd_peer_decref(peer); /* cmid's ref */
- rdma_destroy_id(cmid);
- return;
- failed:
- kiblnd_peer_connect_failed(peer, 1, rc);
-}
-
-bool
-kiblnd_reconnect_peer(struct kib_peer *peer)
-{
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- char *reason = NULL;
- struct list_head txs;
- unsigned long flags;
-
- INIT_LIST_HEAD(&txs);
-
- write_lock_irqsave(glock, flags);
- if (!peer->ibp_reconnecting) {
- if (peer->ibp_accepting)
- reason = "accepting";
- else if (peer->ibp_connecting)
- reason = "connecting";
- else if (!list_empty(&peer->ibp_conns))
- reason = "connected";
- else /* connected then closed */
- reason = "closed";
-
- goto no_reconnect;
- }
-
- LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
- list_empty(&peer->ibp_conns));
- peer->ibp_reconnecting = 0;
-
- if (!kiblnd_peer_active(peer)) {
- list_splice_init(&peer->ibp_tx_queue, &txs);
- reason = "unlinked";
- goto no_reconnect;
- }
-
- peer->ibp_connecting++;
- peer->ibp_reconnected++;
- write_unlock_irqrestore(glock, flags);
-
- kiblnd_connect_peer(peer);
- return true;
-
-no_reconnect:
- write_unlock_irqrestore(glock, flags);
-
- CWARN("Abort reconnection of %s: %s\n",
- libcfs_nid2str(peer->ibp_nid), reason);
- kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED);
- return false;
-}
-
-void
-kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
-{
- struct kib_peer *peer;
- struct kib_peer *peer2;
- struct kib_conn *conn;
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- unsigned long flags;
- int rc;
-
- /*
- * If I get here, I've committed to send, so I complete the tx with
- * failure on any problems
- */
- LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */
- LASSERT(!tx || tx->tx_nwrq > 0); /* work items have been set up */
-
- /*
- * First time, just use a read lock since I expect to find my peer
- * connected
- */
- read_lock_irqsave(g_lock, flags);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer && !list_empty(&peer->ibp_conns)) {
- /* Found a peer with an established connection */
- conn = kiblnd_get_conn_locked(peer);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- read_unlock_irqrestore(g_lock, flags);
-
- if (tx)
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- return;
- }
-
- read_unlock(g_lock);
- /* Re-try with a write lock */
- write_lock(g_lock);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer) {
- if (list_empty(&peer->ibp_conns)) {
- /* found a peer, but it's still connecting... */
- LASSERT(kiblnd_peer_connecting(peer));
- if (tx)
- list_add_tail(&tx->tx_list,
- &peer->ibp_tx_queue);
- write_unlock_irqrestore(g_lock, flags);
- } else {
- conn = kiblnd_get_conn_locked(peer);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (tx)
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- }
- return;
- }
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* Allocate a peer ready to add to the peer table and retry */
- rc = kiblnd_create_peer(ni, &peer, nid);
- if (rc) {
- CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
- if (tx) {
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kiblnd_tx_done(ni, tx);
- }
- return;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- peer2 = kiblnd_find_peer_locked(nid);
- if (peer2) {
- if (list_empty(&peer2->ibp_conns)) {
- /* found a peer, but it's still connecting... */
- LASSERT(kiblnd_peer_connecting(peer2));
- if (tx)
- list_add_tail(&tx->tx_list,
- &peer2->ibp_tx_queue);
- write_unlock_irqrestore(g_lock, flags);
- } else {
- conn = kiblnd_get_conn_locked(peer2);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (tx)
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- }
-
- kiblnd_peer_decref(peer);
- return;
- }
-
- /* Brand new peer */
- LASSERT(!peer->ibp_connecting);
- peer->ibp_connecting = 1;
-
- /* always called with a ref on ni, which prevents ni being shutdown */
- LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown);
-
- if (tx)
- list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
-
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
- write_unlock_irqrestore(g_lock, flags);
-
- kiblnd_connect_peer(peer);
- kiblnd_peer_decref(peer);
-}
-
-int
-kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
- struct lnet_hdr *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- struct lnet_process_id target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct kvec *payload_iov = lntmsg->msg_iov;
- struct bio_vec *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- struct iov_iter from;
- struct kib_msg *ibmsg;
- struct kib_rdma_desc *rd;
- struct kib_tx *tx;
- int nob;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT(!payload_nob || payload_niov > 0);
- LASSERT(payload_niov <= LNET_MAX_IOV);
-
- /* Thread context */
- LASSERT(!in_interrupt());
- /* payload is either all vaddrs or all pages */
- LASSERT(!(payload_kiov && payload_iov));
-
- if (payload_kiov)
- iov_iter_bvec(&from, ITER_BVEC | WRITE,
- payload_kiov, payload_niov,
- payload_nob + payload_offset);
- else
- iov_iter_kvec(&from, ITER_KVEC | WRITE,
- payload_iov, payload_niov,
- payload_nob + payload_offset);
-
- iov_iter_advance(&from, payload_offset);
-
- switch (type) {
- default:
- LBUG();
- return -EIO;
-
- case LNET_MSG_ACK:
- LASSERT(!payload_nob);
- break;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- /* is the REPLY message too small for RDMA? */
- nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
- if (nob <= IBLND_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kiblnd_get_idle_tx(ni, target.nid);
- if (!tx) {
- CERROR("Can't allocate txd for GET to %s\n",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- rd = &ibmsg->ibm_u.get.ibgm_rd;
- if (!(lntmsg->msg_md->md_options & LNET_MD_KIOV))
- rc = kiblnd_setup_rd_iov(ni, tx, rd,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov,
- 0, lntmsg->msg_md->md_length);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, rd,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
- if (rc) {
- CERROR("Can't setup GET sink for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- nob = offsetof(struct kib_get_msg, ibgm_rd.rd_frags[rd->rd_nfrags]);
- ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
- ibmsg->ibm_u.get.ibgm_hdr = *hdr;
-
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob);
-
- tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
- if (!tx->tx_lntmsg[1]) {
- CERROR("Can't create reply for GET -> %s\n",
- libcfs_nid2str(target.nid));
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */
- tx->tx_waiting = 1; /* waiting for GET_DONE */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need RDMA? */
- nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob <= IBLND_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kiblnd_get_idle_tx(ni, target.nid);
- if (!tx) {
- CERROR("Can't allocate %s txd for %s\n",
- type == LNET_MSG_PUT ? "PUT" : "REPLY",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- if (!payload_kiov)
- rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- if (rc) {
- CERROR("Can't setup PUT src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
- ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(struct kib_putreq_msg));
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
- }
-
- /* send IMMEDIATE */
-
- LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob])
- <= IBLND_MSG_SIZE);
-
- tx = kiblnd_get_idle_tx(ni, target.nid);
- if (!tx) {
- CERROR("Can't send %d to %s: tx descs exhausted\n",
- type, libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
- rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob,
- &from);
- if (rc != payload_nob) {
- kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
- return -EFAULT;
- }
-
- nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
-}
-
-static void
-kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg)
-{
- struct lnet_process_id target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct kvec *iov = lntmsg->msg_iov;
- struct bio_vec *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- struct kib_tx *tx;
- int rc;
-
- tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid);
- if (!tx) {
- CERROR("Can't get tx for REPLY to %s\n",
- libcfs_nid2str(target.nid));
- goto failed_0;
- }
-
- if (!nob)
- rc = 0;
- else if (!kiov)
- rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
- niov, iov, offset, nob);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
- niov, kiov, offset, nob);
-
- if (rc) {
- CERROR("Can't setup GET src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- rc = kiblnd_init_rdma(rx->rx_conn, tx,
- IBLND_MSG_GET_DONE, nob,
- &rx->rx_msg->ibm_u.get.ibgm_rd,
- rx->rx_msg->ibm_u.get.ibgm_cookie);
- if (rc < 0) {
- CERROR("Can't setup rdma for GET from %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- if (!nob) {
- /* No RDMA: local completion may happen now! */
- lnet_finalize(ni, lntmsg, 0);
- } else {
- /* RDMA: lnet_finalize(lntmsg) when it completes */
- tx->tx_lntmsg[0] = lntmsg;
- }
-
- kiblnd_queue_tx(tx, rx->rx_conn);
- return;
-
- failed_1:
- kiblnd_tx_done(ni, tx);
- failed_0:
- lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen)
-{
- struct kib_rx *rx = private;
- struct kib_msg *rxmsg = rx->rx_msg;
- struct kib_conn *conn = rx->rx_conn;
- struct kib_tx *tx;
- int nob;
- int post_credit = IBLND_POSTRX_PEER_CREDIT;
- int rc = 0;
-
- LASSERT(iov_iter_count(to) <= rlen);
- LASSERT(!in_interrupt());
- /* Either all pages or all vaddrs */
-
- switch (rxmsg->ibm_type) {
- default:
- LBUG();
-
- case IBLND_MSG_IMMEDIATE:
- nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR("Immediate message from %s too big: %d(%d)\n",
- libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
- nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
-
- rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen,
- to);
- if (rc != rlen) {
- rc = -EFAULT;
- break;
- }
-
- rc = 0;
- lnet_finalize(ni, lntmsg, 0);
- break;
-
- case IBLND_MSG_PUT_REQ: {
- struct kib_msg *txmsg;
- struct kib_rdma_desc *rd;
-
- if (!iov_iter_count(to)) {
- lnet_finalize(ni, lntmsg, 0);
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
- if (!tx) {
- CERROR("Can't allocate tx for %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* Not replying will break the connection */
- rc = -ENOMEM;
- break;
- }
-
- txmsg = tx->tx_msg;
- rd = &txmsg->ibm_u.putack.ibpam_rd;
- if (!(to->type & ITER_BVEC))
- rc = kiblnd_setup_rd_iov(ni, tx, rd,
- to->nr_segs, to->kvec,
- to->iov_offset,
- iov_iter_count(to));
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, rd,
- to->nr_segs, to->bvec,
- to->iov_offset,
- iov_iter_count(to));
- if (rc) {
- CERROR("Can't setup PUT sink for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kiblnd_tx_done(ni, tx);
- /* tell peer it's over */
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- nob = offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[rd->rd_nfrags]);
- txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
- txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
-
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_DONE */
- kiblnd_queue_tx(tx, conn);
-
- /* reposted buffer reserved for PUT_DONE */
- post_credit = IBLND_POSTRX_NO_CREDIT;
- break;
- }
-
- case IBLND_MSG_GET_REQ:
- if (lntmsg) {
- /* Optimized GET; RDMA lntmsg's payload */
- kiblnd_reply(ni, rx, lntmsg);
- } else {
- /* GET didn't match anything */
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE,
- -ENODATA,
- rxmsg->ibm_u.get.ibgm_cookie);
- }
- break;
- }
-
- kiblnd_post_rx(rx, post_credit);
- return rc;
-}
-
-int
-kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
- struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
- if (IS_ERR(task))
- return PTR_ERR(task);
-
- atomic_inc(&kiblnd_data.kib_nthreads);
- return 0;
-}
-
-static void
-kiblnd_thread_fini(void)
-{
- atomic_dec(&kiblnd_data.kib_nthreads);
-}
-
-static void
-kiblnd_peer_alive(struct kib_peer *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->ibp_last_alive = cfs_time_current();
- mb();
-}
-
-static void
-kiblnd_peer_notify(struct kib_peer *peer)
-{
- int error = 0;
- unsigned long last_alive = 0;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (kiblnd_peer_idle(peer) && peer->ibp_error) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
-
- last_alive = peer->ibp_last_alive;
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (error)
- lnet_notify(peer->ibp_ni,
- peer->ibp_nid, 0, last_alive);
-}
-
-void
-kiblnd_close_conn_locked(struct kib_conn *conn, int error)
-{
- /*
- * This just does the immediate housekeeping. 'error' is zero for a
- * normal shutdown which can happen only after the connection has been
- * established. If the connection is established, schedule the
- * connection to be finished off by the connd. Otherwise the connd is
- * already dealing with it (either to set it up or tear it down).
- * Caller holds kib_global_lock exclusively in irq context
- */
- struct kib_peer *peer = conn->ibc_peer;
- struct kib_dev *dev;
- unsigned long flags;
-
- LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- if (error && !conn->ibc_comms_error)
- conn->ibc_comms_error = error;
-
- if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
- return; /* already being handled */
-
- if (!error &&
- list_empty(&conn->ibc_tx_noops) &&
- list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_rsrvd) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- list_empty(&conn->ibc_active_txs)) {
- CDEBUG(D_NET, "closing conn to %s\n",
- libcfs_nid2str(peer->ibp_nid));
- } else {
- CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
- libcfs_nid2str(peer->ibp_nid), error,
- list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
- list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
- list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
- list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
- list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
- }
-
- dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
- list_del(&conn->ibc_list);
- /* connd (see below) takes over ibc_list's ref */
-
- if (list_empty(&peer->ibp_conns) && /* no more conns */
- kiblnd_peer_active(peer)) { /* still in peer table */
- kiblnd_unlink_peer_locked(peer);
-
- /* set/clear error on last conn */
- peer->ibp_error = conn->ibc_comms_error;
- }
-
- kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
-
- if (error &&
- kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- wake_up(&kiblnd_data.kib_failover_waitq);
- }
-
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
-
- list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns);
- wake_up(&kiblnd_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
-}
-
-void
-kiblnd_close_conn(struct kib_conn *conn, int error)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_close_conn_locked(conn, error);
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-static void
-kiblnd_handle_early_rxs(struct kib_conn *conn)
-{
- unsigned long flags;
- struct kib_rx *rx;
- struct kib_rx *tmp;
-
- LASSERT(!in_interrupt());
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- list_for_each_entry_safe(rx, tmp, &conn->ibc_early_rxs, rx_list) {
- list_del(&rx->rx_list);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_handle_rx(rx);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- }
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-static void
-kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
-{
- LIST_HEAD(zombies);
- struct list_head *tmp;
- struct list_head *nxt;
- struct kib_tx *tx;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each_safe(tmp, nxt, txs) {
- tx = list_entry(tmp, struct kib_tx, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT(!tx->tx_queued);
- LASSERT(tx->tx_waiting || tx->tx_sending);
- } else {
- LASSERT(tx->tx_queued);
- }
-
- tx->tx_status = -ECONNABORTED;
- tx->tx_waiting = 0;
-
- if (!tx->tx_sending) {
- tx->tx_queued = 0;
- list_del(&tx->tx_list);
- list_add(&tx->tx_list, &zombies);
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-
- kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED);
-}
-
-static void
-kiblnd_finalise_conn(struct kib_conn *conn)
-{
- LASSERT(!in_interrupt());
- LASSERT(conn->ibc_state > IBLND_CONN_INIT);
-
- kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED);
-
- /*
- * abort_receives moves QP state to IB_QPS_ERR. This is only required
- * for connections that didn't get as far as being connected, because
- * rdma_disconnect() does this for free.
- */
- kiblnd_abort_receives(conn);
-
- /*
- * Complete all tx descs not waiting for sends to complete.
- * NB we should be safe from RDMA now that the QP has changed state
- */
- kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
- kiblnd_abort_txs(conn, &conn->ibc_active_txs);
-
- kiblnd_handle_early_rxs(conn);
-}
-
-static void
-kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error)
-{
- LIST_HEAD(zombies);
- unsigned long flags;
-
- LASSERT(error);
- LASSERT(!in_interrupt());
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (active) {
- LASSERT(peer->ibp_connecting > 0);
- peer->ibp_connecting--;
- } else {
- LASSERT(peer->ibp_accepting > 0);
- peer->ibp_accepting--;
- }
-
- if (kiblnd_peer_connecting(peer)) {
- /* another connection attempt under way... */
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- return;
- }
-
- peer->ibp_reconnected = 0;
- if (list_empty(&peer->ibp_conns)) {
- /* Take peer's blocked transmits to complete with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (kiblnd_peer_active(peer))
- kiblnd_unlink_peer_locked(peer);
-
- peer->ibp_error = error;
- } else {
- /* Can't have blocked transmits if there are connections */
- LASSERT(list_empty(&peer->ibp_tx_queue));
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_peer_notify(peer);
-
- if (list_empty(&zombies))
- return;
-
- CNETERR("Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
-}
-
-static void
-kiblnd_connreq_done(struct kib_conn *conn, int status)
-{
- struct kib_peer *peer = conn->ibc_peer;
- struct kib_tx *tx;
- struct kib_tx *tmp;
- struct list_head txs;
- unsigned long flags;
- int active;
-
- active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
- CDEBUG(D_NET, "%s: active(%d), version(%x), status(%d)\n",
- libcfs_nid2str(peer->ibp_nid), active,
- conn->ibc_version, status);
-
- LASSERT(!in_interrupt());
- LASSERT((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
- peer->ibp_connecting > 0) ||
- (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
- peer->ibp_accepting > 0));
-
- kfree(conn->ibc_connvars);
- conn->ibc_connvars = NULL;
-
- if (status) {
- /* failed to establish connection */
- kiblnd_peer_connect_failed(peer, active, status);
- kiblnd_finalise_conn(conn);
- return;
- }
-
- /* connection established */
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- conn->ibc_last_send = jiffies;
- kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
- kiblnd_peer_alive(peer);
-
- /*
- * Add conn to peer's list and nuke any dangling conns from a different
- * peer instance...
- */
- kiblnd_conn_addref(conn); /* +1 ref for ibc_list */
- list_add(&conn->ibc_list, &peer->ibp_conns);
- peer->ibp_reconnected = 0;
- if (active)
- peer->ibp_connecting--;
- else
- peer->ibp_accepting--;
-
- if (!peer->ibp_version) {
- peer->ibp_version = conn->ibc_version;
- peer->ibp_incarnation = conn->ibc_incarnation;
- }
-
- if (peer->ibp_version != conn->ibc_version ||
- peer->ibp_incarnation != conn->ibc_incarnation) {
- kiblnd_close_stale_conns_locked(peer, conn->ibc_version,
- conn->ibc_incarnation);
- peer->ibp_version = conn->ibc_version;
- peer->ibp_incarnation = conn->ibc_incarnation;
- }
-
- /* grab pending txs while I have the lock */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (!kiblnd_peer_active(peer) || /* peer has been deleted */
- conn->ibc_comms_error) { /* error has happened already */
- struct lnet_ni *ni = peer->ibp_ni;
-
- /* start to shut down connection */
- kiblnd_close_conn_locked(conn, -ECONNABORTED);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
-
- return;
- }
-
- /*
- * +1 ref for myself, this connection is visible to other threads
- * now, refcount of peer:ibp_conns can be released by connection
- * close from either a different thread, or the calling of
- * kiblnd_check_sends_locked() below. See bz21911 for details.
- */
- kiblnd_conn_addref(conn);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /* Schedule blocked txs */
- spin_lock(&conn->ibc_lock);
- list_for_each_entry_safe(tx, tmp, &txs, tx_list) {
- list_del(&tx->tx_list);
-
- kiblnd_queue_tx_locked(tx, conn);
- }
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
- /* schedule blocked rxs */
- kiblnd_handle_early_rxs(conn);
-
- kiblnd_conn_decref(conn);
-}
-
-static void
-kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej)
-{
- int rc;
-
- rc = rdma_reject(cmid, rej, sizeof(*rej));
-
- if (rc)
- CWARN("Error %d sending reject\n", rc);
-}
-
-static int
-kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
-{
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- struct kib_msg *reqmsg = priv;
- struct kib_msg *ackmsg;
- struct kib_dev *ibdev;
- struct kib_peer *peer;
- struct kib_peer *peer2;
- struct kib_conn *conn;
- struct lnet_ni *ni = NULL;
- struct kib_net *net = NULL;
- lnet_nid_t nid;
- struct rdma_conn_param cp;
- struct kib_rej rej;
- int version = IBLND_MSG_VERSION;
- unsigned long flags;
- int max_frags;
- int rc;
- struct sockaddr_in *peer_addr;
-
- LASSERT(!in_interrupt());
-
- /* cmid inherits 'context' from the corresponding listener id */
- ibdev = (struct kib_dev *)cmid->context;
- LASSERT(ibdev);
-
- memset(&rej, 0, sizeof(rej));
- rej.ibr_magic = IBLND_MSG_MAGIC;
- rej.ibr_why = IBLND_REJECT_FATAL;
- rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
- peer_addr = (struct sockaddr_in *)&cmid->route.addr.dst_addr;
- if (*kiblnd_tunables.kib_require_priv_port &&
- ntohs(peer_addr->sin_port) >= PROT_SOCK) {
- __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
-
- CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
- &ip, ntohs(peer_addr->sin_port));
- goto failed;
- }
-
- if (priv_nob < offsetof(struct kib_msg, ibm_type)) {
- CERROR("Short connection request\n");
- goto failed;
- }
-
- /*
- * Future protocol version compatibility support! If the
- * o2iblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will
- * negotiate a protocol version. I trap this here to avoid
- * console errors; the reject tells the peer which protocol I
- * speak.
- */
- if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
- reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
- goto failed;
- if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
- reqmsg->ibm_version != IBLND_MSG_VERSION &&
- reqmsg->ibm_version != IBLND_MSG_VERSION_1)
- goto failed;
- if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
- goto failed;
-
- rc = kiblnd_unpack_msg(reqmsg, priv_nob);
- if (rc) {
- CERROR("Can't parse connection request: %d\n", rc);
- goto failed;
- }
-
- nid = reqmsg->ibm_srcnid;
- ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
-
- if (ni) {
- net = (struct kib_net *)ni->ni_data;
- rej.ibr_incarnation = net->ibn_incarnation;
- }
-
- if (!ni || /* no matching net */
- ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
- net->ibn_dev != ibdev) { /* wrong device */
- CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n",
- libcfs_nid2str(nid),
- !ni ? "NA" : libcfs_nid2str(ni->ni_nid),
- ibdev->ibd_ifname, ibdev->ibd_nnets,
- &ibdev->ibd_ifip,
- libcfs_nid2str(reqmsg->ibm_dstnid));
-
- goto failed;
- }
-
- /* check time stamp as soon as possible */
- if (reqmsg->ibm_dststamp &&
- reqmsg->ibm_dststamp != net->ibn_incarnation) {
- CWARN("Stale connection request\n");
- rej.ibr_why = IBLND_REJECT_CONN_STALE;
- goto failed;
- }
-
- /* I can accept peer's version */
- version = reqmsg->ibm_version;
-
- if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
- CERROR("Unexpected connreq msg type: %x from %s\n",
- reqmsg->ibm_type, libcfs_nid2str(nid));
- goto failed;
- }
-
- if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
- kiblnd_msg_queue_size(version, ni)) {
- CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n",
- libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_queue_depth,
- kiblnd_msg_queue_size(version, ni));
-
- if (version == IBLND_MSG_VERSION)
- rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
-
- goto failed;
- }
-
- max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
- if (max_frags > kiblnd_rdma_frags(version, ni)) {
- CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
- libcfs_nid2str(nid), version, max_frags,
- kiblnd_rdma_frags(version, ni));
-
- if (version >= IBLND_MSG_VERSION)
- rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
-
- goto failed;
- } else if (max_frags < kiblnd_rdma_frags(version, ni) &&
- !net->ibn_fmr_ps) {
- CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
- libcfs_nid2str(nid), version, max_frags,
- kiblnd_rdma_frags(version, ni));
-
- if (version == IBLND_MSG_VERSION)
- rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
-
- goto failed;
- }
-
- if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
- CERROR("Can't accept %s: message size %d too big (%d max)\n",
- libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_max_msg_size,
- IBLND_MSG_SIZE);
- goto failed;
- }
-
- /* assume 'nid' is a new peer; create */
- rc = kiblnd_create_peer(ni, &peer, nid);
- if (rc) {
- CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
- rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
- goto failed;
- }
-
- /* We have validated the peer's parameters so use those */
- peer->ibp_max_frags = max_frags;
- peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
-
- write_lock_irqsave(g_lock, flags);
-
- peer2 = kiblnd_find_peer_locked(nid);
- if (peer2) {
- if (!peer2->ibp_version) {
- peer2->ibp_version = version;
- peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
- }
-
- /* not the guy I've talked with */
- if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
- peer2->ibp_version != version) {
- kiblnd_close_peer_conns_locked(peer2, -ESTALE);
-
- if (kiblnd_peer_active(peer2)) {
- peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
- peer2->ibp_version = version;
- }
- write_unlock_irqrestore(g_lock, flags);
-
- CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n",
- libcfs_nid2str(nid), peer2->ibp_version, version,
- peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
-
- kiblnd_peer_decref(peer);
- rej.ibr_why = IBLND_REJECT_CONN_STALE;
- goto failed;
- }
-
- /*
- * Tie-break connection race in favour of the higher NID.
- * If we keep running into a race condition multiple times,
- * we have to assume that the connection attempt with the
- * higher NID is stuck in a connecting state and will never
- * recover. As such, we pass through this if-block and let
- * the lower NID connection win so we can move forward.
- */
- if (peer2->ibp_connecting &&
- nid < ni->ni_nid && peer2->ibp_races <
- MAX_CONN_RACES_BEFORE_ABORT) {
- peer2->ibp_races++;
- write_unlock_irqrestore(g_lock, flags);
-
- CDEBUG(D_NET, "Conn race %s\n",
- libcfs_nid2str(peer2->ibp_nid));
-
- kiblnd_peer_decref(peer);
- rej.ibr_why = IBLND_REJECT_CONN_RACE;
- goto failed;
- }
- if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
- CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
- libcfs_nid2str(peer2->ibp_nid),
- MAX_CONN_RACES_BEFORE_ABORT);
- /**
- * passive connection is allowed even this peer is waiting for
- * reconnection.
- */
- peer2->ibp_reconnecting = 0;
- peer2->ibp_races = 0;
- peer2->ibp_accepting++;
- kiblnd_peer_addref(peer2);
-
- /**
- * Race with kiblnd_launch_tx (active connect) to create peer
- * so copy validated parameters since we now know what the
- * peer's limits are
- */
- peer2->ibp_max_frags = peer->ibp_max_frags;
- peer2->ibp_queue_depth = peer->ibp_queue_depth;
-
- write_unlock_irqrestore(g_lock, flags);
- kiblnd_peer_decref(peer);
- peer = peer2;
- } else {
- /* Brand new peer */
- LASSERT(!peer->ibp_accepting);
- LASSERT(!peer->ibp_version &&
- !peer->ibp_incarnation);
-
- peer->ibp_accepting = 1;
- peer->ibp_version = version;
- peer->ibp_incarnation = reqmsg->ibm_srcstamp;
-
- /* I have a ref on ni that prevents it being shutdown */
- LASSERT(!net->ibn_shutdown);
-
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
- write_unlock_irqrestore(g_lock, flags);
- }
-
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT,
- version);
- if (!conn) {
- kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
- kiblnd_peer_decref(peer);
- rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
- goto failed;
- }
-
- /*
- * conn now "owns" cmid, so I return success from here on to ensure the
- * CM callback doesn't destroy cmid.
- */
- conn->ibc_incarnation = reqmsg->ibm_srcstamp;
- conn->ibc_credits = conn->ibc_queue_depth;
- conn->ibc_reserved_credits = conn->ibc_queue_depth;
- LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
- IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
-
- ackmsg = &conn->ibc_connvars->cv_msg;
- memset(ackmsg, 0, sizeof(*ackmsg));
-
- kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
- sizeof(ackmsg->ibm_u.connparams));
- ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
- ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
- kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
-
- memset(&cp, 0, sizeof(cp));
- cp.private_data = ackmsg;
- cp.private_data_len = ackmsg->ibm_nob;
- cp.responder_resources = 0; /* No atomic ops or RDMA reads */
- cp.initiator_depth = 0;
- cp.flow_control = 1;
- cp.retry_count = *kiblnd_tunables.kib_retry_count;
- cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
-
- CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
-
- rc = rdma_accept(cmid, &cp);
- if (rc) {
- CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
- rej.ibr_version = version;
- rej.ibr_why = IBLND_REJECT_FATAL;
-
- kiblnd_reject(cmid, &rej);
- kiblnd_connreq_done(conn, rc);
- kiblnd_conn_decref(conn);
- }
-
- lnet_ni_decref(ni);
- return 0;
-
- failed:
- if (ni) {
- rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
- rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
- lnet_ni_decref(ni);
- }
-
- rej.ibr_version = version;
- kiblnd_reject(cmid, &rej);
-
- return -ECONNREFUSED;
-}
-
-static void
-kiblnd_check_reconnect(struct kib_conn *conn, int version,
- __u64 incarnation, int why, struct kib_connparams *cp)
-{
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_peer *peer = conn->ibc_peer;
- char *reason;
- int msg_size = IBLND_MSG_SIZE;
- int frag_num = -1;
- int queue_dep = -1;
- bool reconnect;
- unsigned long flags;
-
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
- LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */
- LASSERT(!peer->ibp_reconnecting);
-
- if (cp) {
- msg_size = cp->ibcp_max_msg_size;
- frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
- queue_dep = cp->ibcp_queue_depth;
- }
-
- write_lock_irqsave(glock, flags);
- /**
- * retry connection if it's still needed and no other connection
- * attempts (active or passive) are in progress
- * NB: reconnect is still needed even when ibp_tx_queue is
- * empty if ibp_version != version because reconnect may be
- * initiated by kiblnd_query()
- */
- reconnect = (!list_empty(&peer->ibp_tx_queue) ||
- peer->ibp_version != version) &&
- peer->ibp_connecting == 1 &&
- !peer->ibp_accepting;
- if (!reconnect) {
- reason = "no need";
- goto out;
- }
-
- switch (why) {
- default:
- reason = "Unknown";
- break;
-
- case IBLND_REJECT_RDMA_FRAGS: {
- struct lnet_ioctl_config_lnd_tunables *tunables;
-
- if (!cp) {
- reason = "can't negotiate max frags";
- goto out;
- }
- tunables = peer->ibp_ni->ni_lnd_tunables;
- if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
- reason = "map_on_demand must be enabled";
- goto out;
- }
- if (conn->ibc_max_frags <= frag_num) {
- reason = "unsupported max frags";
- goto out;
- }
-
- peer->ibp_max_frags = frag_num;
- reason = "rdma fragments";
- break;
- }
- case IBLND_REJECT_MSG_QUEUE_SIZE:
- if (!cp) {
- reason = "can't negotiate queue depth";
- goto out;
- }
- if (conn->ibc_queue_depth <= queue_dep) {
- reason = "unsupported queue depth";
- goto out;
- }
-
- peer->ibp_queue_depth = queue_dep;
- reason = "queue depth";
- break;
-
- case IBLND_REJECT_CONN_STALE:
- reason = "stale";
- break;
-
- case IBLND_REJECT_CONN_RACE:
- reason = "conn race";
- break;
-
- case IBLND_REJECT_CONN_UNCOMPAT:
- reason = "version negotiation";
- break;
- }
-
- conn->ibc_reconnect = 1;
- peer->ibp_reconnecting = 1;
- peer->ibp_version = version;
- if (incarnation)
- peer->ibp_incarnation = incarnation;
-out:
- write_unlock_irqrestore(glock, flags);
-
- CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n",
- libcfs_nid2str(peer->ibp_nid),
- reconnect ? "reconnect" : "don't reconnect",
- reason, IBLND_MSG_VERSION, version, msg_size,
- conn->ibc_queue_depth, queue_dep,
- conn->ibc_max_frags, frag_num);
- /**
- * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer
- * while destroying the zombie
- */
-}
-
-static void
-kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
-{
- struct kib_peer *peer = conn->ibc_peer;
-
- LASSERT(!in_interrupt());
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
- switch (reason) {
- case IB_CM_REJ_STALE_CONN:
- kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0,
- IBLND_REJECT_CONN_STALE, NULL);
- break;
-
- case IB_CM_REJ_INVALID_SERVICE_ID:
- CNETERR("%s rejected: no listener at %d\n",
- libcfs_nid2str(peer->ibp_nid),
- *kiblnd_tunables.kib_service);
- break;
-
- case IB_CM_REJ_CONSUMER_DEFINED:
- if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) {
- struct kib_rej *rej = priv;
- struct kib_connparams *cp = NULL;
- int flip = 0;
- __u64 incarnation = -1;
-
- /* NB. default incarnation is -1 because:
- * a) V1 will ignore dst incarnation in connreq.
- * b) V2 will provide incarnation while rejecting me,
- * -1 will be overwrote.
- *
- * if I try to connect to a V1 peer with V2 protocol,
- * it rejected me then upgrade to V2, I have no idea
- * about the upgrading and try to reconnect with V1,
- * in this case upgraded V2 can find out I'm trying to
- * talk to the old guy and reject me(incarnation is -1).
- */
-
- if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) ||
- rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) {
- __swab32s(&rej->ibr_magic);
- __swab16s(&rej->ibr_version);
- flip = 1;
- }
-
- if (priv_nob >= sizeof(struct kib_rej) &&
- rej->ibr_version > IBLND_MSG_VERSION_1) {
- /*
- * priv_nob is always 148 in current version
- * of OFED, so we still need to check version.
- * (define of IB_CM_REJ_PRIVATE_DATA_SIZE)
- */
- cp = &rej->ibr_cp;
-
- if (flip) {
- __swab64s(&rej->ibr_incarnation);
- __swab16s(&cp->ibcp_queue_depth);
- __swab16s(&cp->ibcp_max_frags);
- __swab32s(&cp->ibcp_max_msg_size);
- }
-
- incarnation = rej->ibr_incarnation;
- }
-
- if (rej->ibr_magic != IBLND_MSG_MAGIC &&
- rej->ibr_magic != LNET_PROTO_MAGIC) {
- CERROR("%s rejected: consumer defined fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
- }
-
- if (rej->ibr_version != IBLND_MSG_VERSION &&
- rej->ibr_version != IBLND_MSG_VERSION_1) {
- CERROR("%s rejected: o2iblnd version %x error\n",
- libcfs_nid2str(peer->ibp_nid),
- rej->ibr_version);
- break;
- }
-
- if (rej->ibr_why == IBLND_REJECT_FATAL &&
- rej->ibr_version == IBLND_MSG_VERSION_1) {
- CDEBUG(D_NET, "rejected by old version peer %s: %x\n",
- libcfs_nid2str(peer->ibp_nid), rej->ibr_version);
-
- if (conn->ibc_version != IBLND_MSG_VERSION_1)
- rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT;
- }
-
- switch (rej->ibr_why) {
- case IBLND_REJECT_CONN_RACE:
- case IBLND_REJECT_CONN_STALE:
- case IBLND_REJECT_CONN_UNCOMPAT:
- case IBLND_REJECT_MSG_QUEUE_SIZE:
- case IBLND_REJECT_RDMA_FRAGS:
- kiblnd_check_reconnect(conn, rej->ibr_version,
- incarnation,
- rej->ibr_why, cp);
- break;
-
- case IBLND_REJECT_NO_RESOURCES:
- CERROR("%s rejected: o2iblnd no resources\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
-
- case IBLND_REJECT_FATAL:
- CERROR("%s rejected: o2iblnd fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
-
- default:
- CERROR("%s rejected: o2iblnd reason %d\n",
- libcfs_nid2str(peer->ibp_nid),
- rej->ibr_why);
- break;
- }
- break;
- }
- /* fall through */
- default:
- CNETERR("%s rejected: reason %d, size %d\n",
- libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
- break;
- }
-
- kiblnd_connreq_done(conn, -ECONNREFUSED);
-}
-
-static void
-kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
-{
- struct kib_peer *peer = conn->ibc_peer;
- struct lnet_ni *ni = peer->ibp_ni;
- struct kib_net *net = ni->ni_data;
- struct kib_msg *msg = priv;
- int ver = conn->ibc_version;
- int rc = kiblnd_unpack_msg(msg, priv_nob);
- unsigned long flags;
-
- LASSERT(net);
-
- if (rc) {
- CERROR("Can't unpack connack from %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- goto failed;
- }
-
- if (msg->ibm_type != IBLND_MSG_CONNACK) {
- CERROR("Unexpected message %d from %s\n",
- msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
- rc = -EPROTO;
- goto failed;
- }
-
- if (ver != msg->ibm_version) {
- CERROR("%s replied version %x is different with requested version %x\n",
- libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth >
- conn->ibc_queue_depth) {
- CERROR("%s has incompatible queue depth %d (<=%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- conn->ibc_queue_depth);
- rc = -EPROTO;
- goto failed;
- }
-
- if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
- conn->ibc_max_frags) {
- CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
- conn->ibc_max_frags);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
- CERROR("%s max message size %d too big (%d max)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_msg_size,
- IBLND_MSG_SIZE);
- rc = -EPROTO;
- goto failed;
- }
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- if (msg->ibm_dstnid == ni->ni_nid &&
- msg->ibm_dststamp == net->ibn_incarnation)
- rc = 0;
- else
- rc = -ESTALE;
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (rc) {
- CERROR("Bad connection reply from %s, rc = %d, version: %x max_frags: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc,
- msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags);
- goto failed;
- }
-
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
- LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
- IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
-
- kiblnd_connreq_done(conn, 0);
- return;
-
- failed:
- /*
- * NB My QP has already established itself, so I handle anything going
- * wrong here by setting ibc_comms_error.
- * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then
- * immediately tears it down.
- */
- LASSERT(rc);
- conn->ibc_comms_error = rc;
- kiblnd_connreq_done(conn, 0);
-}
-
-static int
-kiblnd_active_connect(struct rdma_cm_id *cmid)
-{
- struct kib_peer *peer = (struct kib_peer *)cmid->context;
- struct kib_conn *conn;
- struct kib_msg *msg;
- struct rdma_conn_param cp;
- int version;
- __u64 incarnation;
- unsigned long flags;
- int rc;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- incarnation = peer->ibp_incarnation;
- version = !peer->ibp_version ? IBLND_MSG_VERSION :
- peer->ibp_version;
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
- version);
- if (!conn) {
- kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
- kiblnd_peer_decref(peer); /* lose cmid's ref */
- return -ENOMEM;
- }
-
- /*
- * conn "owns" cmid now, so I return success from here on to ensure the
- * CM callback doesn't destroy cmid. conn also takes over cmid's ref
- * on peer
- */
- msg = &conn->ibc_connvars->cv_msg;
-
- memset(msg, 0, sizeof(*msg));
- kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
- msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
- msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
- kiblnd_pack_msg(peer->ibp_ni, msg, version,
- 0, peer->ibp_nid, incarnation);
-
- memset(&cp, 0, sizeof(cp));
- cp.private_data = msg;
- cp.private_data_len = msg->ibm_nob;
- cp.responder_resources = 0; /* No atomic ops or RDMA reads */
- cp.initiator_depth = 0;
- cp.flow_control = 1;
- cp.retry_count = *kiblnd_tunables.kib_retry_count;
- cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
-
- LASSERT(cmid->context == (void *)conn);
- LASSERT(conn->ibc_cmid == cmid);
-
- rc = rdma_connect(cmid, &cp);
- if (rc) {
- CERROR("Can't connect to %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- kiblnd_connreq_done(conn, rc);
- kiblnd_conn_decref(conn);
- }
-
- return 0;
-}
-
-int
-kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
-{
- struct kib_peer *peer;
- struct kib_conn *conn;
- int rc;
-
- switch (event->event) {
- default:
- CERROR("Unexpected event: %d, status: %d\n",
- event->event, event->status);
- LBUG();
-
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- /* destroy cmid on failure */
- rc = kiblnd_passive_connect(cmid,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- CDEBUG(D_NET, "connreq: %d\n", rc);
- return rc;
-
- case RDMA_CM_EVENT_ADDR_ERROR:
- peer = (struct kib_peer *)cmid->context;
- CNETERR("%s: ADDR ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
- return -EHOSTUNREACH; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- peer = (struct kib_peer *)cmid->context;
-
- CDEBUG(D_NET, "%s Addr resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
-
- if (event->status) {
- CNETERR("Can't resolve address for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- rc = event->status;
- } else {
- rc = rdma_resolve_route(
- cmid, *kiblnd_tunables.kib_timeout * 1000);
- if (!rc)
- return 0;
- /* Can't initiate route resolution */
- CERROR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- }
- kiblnd_peer_connect_failed(peer, 1, rc);
- kiblnd_peer_decref(peer);
- return rc; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_ROUTE_ERROR:
- peer = (struct kib_peer *)cmid->context;
- CNETERR("%s: ROUTE ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
- return -EHOSTUNREACH; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_ROUTE_RESOLVED:
- peer = (struct kib_peer *)cmid->context;
- CDEBUG(D_NET, "%s Route resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
-
- if (!event->status)
- return kiblnd_active_connect(cmid);
-
- CNETERR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, event->status);
- kiblnd_peer_decref(peer);
- return event->status; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_UNREACHABLE:
- conn = (struct kib_conn *)cmid->context;
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
- conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
- CNETERR("%s: UNREACHABLE %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
- kiblnd_connreq_done(conn, -ENETDOWN);
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_CONNECT_ERROR:
- conn = (struct kib_conn *)cmid->context;
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
- conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
- CNETERR("%s: CONNECT ERROR %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
- kiblnd_connreq_done(conn, -ENOTCONN);
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_REJECTED:
- conn = (struct kib_conn *)cmid->context;
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBLND_CONN_PASSIVE_WAIT:
- CERROR("%s: REJECTED %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- event->status);
- kiblnd_connreq_done(conn, -ECONNRESET);
- break;
-
- case IBLND_CONN_ACTIVE_CONNECT:
- kiblnd_rejected(conn, event->status,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- break;
- }
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_ESTABLISHED:
- conn = (struct kib_conn *)cmid->context;
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBLND_CONN_PASSIVE_WAIT:
- CDEBUG(D_NET, "ESTABLISHED (passive): %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_connreq_done(conn, 0);
- break;
-
- case IBLND_CONN_ACTIVE_CONNECT:
- CDEBUG(D_NET, "ESTABLISHED(active): %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_check_connreply(conn,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- break;
- }
- /* net keeps its ref on conn! */
- return 0;
-
- case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n");
- return 0;
- case RDMA_CM_EVENT_DISCONNECTED:
- conn = (struct kib_conn *)cmid->context;
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- CERROR("%s DISCONNECTED\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_connreq_done(conn, -ECONNRESET);
- } else {
- kiblnd_close_conn(conn, 0);
- }
- kiblnd_conn_decref(conn);
- cmid->context = NULL;
- return 0;
-
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- LCONSOLE_ERROR_MSG(0x131,
- "Received notification of device removal\n"
- "Please shutdown LNET to allow this to proceed\n");
- /*
- * Can't remove network from underneath LNET for now, so I have
- * to ignore this
- */
- return 0;
-
- case RDMA_CM_EVENT_ADDR_CHANGE:
- LCONSOLE_INFO("Physical link changed (eg hca/port)\n");
- return 0;
- }
-}
-
-static int
-kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
-{
- struct kib_tx *tx;
- struct list_head *ttmp;
-
- list_for_each(ttmp, txs) {
- tx = list_entry(ttmp, struct kib_tx, tx_list);
-
- if (txs != &conn->ibc_active_txs) {
- LASSERT(tx->tx_queued);
- } else {
- LASSERT(!tx->tx_queued);
- LASSERT(tx->tx_waiting || tx->tx_sending);
- }
-
- if (cfs_time_aftereq(jiffies, tx->tx_deadline)) {
- CERROR("Timed out tx: %s, %lu seconds\n",
- kiblnd_queue2str(conn, txs),
- cfs_duration_sec(jiffies - tx->tx_deadline));
- return 1;
- }
- }
-
- return 0;
-}
-
-static int
-kiblnd_conn_timed_out_locked(struct kib_conn *conn)
-{
- return kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_active_txs);
-}
-
-static void
-kiblnd_check_conns(int idx)
-{
- LIST_HEAD(closes);
- LIST_HEAD(checksends);
- struct list_head *peers = &kiblnd_data.kib_peers[idx];
- struct list_head *ptmp;
- struct kib_peer *peer;
- struct kib_conn *conn;
- struct kib_conn *temp;
- struct kib_conn *tmp;
- struct list_head *ctmp;
- unsigned long flags;
-
- /*
- * NB. We expect to have a look at all the peers and not find any
- * RDMAs to time out, so we just use a shared lock while we
- * take a look...
- */
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- list_for_each(ptmp, peers) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
-
- list_for_each(ctmp, &peer->ibp_conns) {
- int timedout;
- int sendnoop;
-
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED);
-
- spin_lock(&conn->ibc_lock);
-
- sendnoop = kiblnd_need_noop(conn);
- timedout = kiblnd_conn_timed_out_locked(conn);
- if (!sendnoop && !timedout) {
- spin_unlock(&conn->ibc_lock);
- continue;
- }
-
- if (timedout) {
- CERROR("Timed out RDMA with %s (%lu): c: %u, oc: %u, rc: %u\n",
- libcfs_nid2str(peer->ibp_nid),
- cfs_duration_sec(cfs_time_current() -
- peer->ibp_last_alive),
- conn->ibc_credits,
- conn->ibc_outstanding_credits,
- conn->ibc_reserved_credits);
- list_add(&conn->ibc_connd_list, &closes);
- } else {
- list_add(&conn->ibc_connd_list, &checksends);
- }
- /* +ref for 'closes' or 'checksends' */
- kiblnd_conn_addref(conn);
-
- spin_unlock(&conn->ibc_lock);
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /*
- * Handle timeout by closing the whole
- * connection. We can only be sure RDMA activity
- * has ceased once the QP has been modified.
- */
- list_for_each_entry_safe(conn, tmp, &closes, ibc_connd_list) {
- list_del(&conn->ibc_connd_list);
- kiblnd_close_conn(conn, -ETIMEDOUT);
- kiblnd_conn_decref(conn);
- }
-
- /*
- * In case we have enough credits to return via a
- * NOOP, but there were no non-blocking tx descs
- * free to do it last time...
- */
- list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
- list_del(&conn->ibc_connd_list);
-
- spin_lock(&conn->ibc_lock);
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
- kiblnd_conn_decref(conn);
- }
-}
-
-static void
-kiblnd_disconnect_conn(struct kib_conn *conn)
-{
- LASSERT(!in_interrupt());
- LASSERT(current == kiblnd_data.kib_connd);
- LASSERT(conn->ibc_state == IBLND_CONN_CLOSING);
-
- rdma_disconnect(conn->ibc_cmid);
- kiblnd_finalise_conn(conn);
-
- kiblnd_peer_notify(conn->ibc_peer);
-}
-
-/**
- * High-water for reconnection to the same peer, reconnection attempt should
- * be delayed after trying more than KIB_RECONN_HIGH_RACE.
- */
-#define KIB_RECONN_HIGH_RACE 10
-/**
- * Allow connd to take a break and handle other things after consecutive
- * reconnection attempts.
- */
-#define KIB_RECONN_BREAK 100
-
-int
-kiblnd_connd(void *arg)
-{
- spinlock_t *lock = &kiblnd_data.kib_connd_lock;
- wait_queue_entry_t wait;
- unsigned long flags;
- struct kib_conn *conn;
- int timeout;
- int i;
- int dropped_lock;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- init_waitqueue_entry(&wait, current);
- kiblnd_data.kib_connd = current;
-
- spin_lock_irqsave(lock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
- int reconn = 0;
-
- dropped_lock = 0;
-
- if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
- struct kib_peer *peer = NULL;
-
- conn = list_entry(kiblnd_data.kib_connd_zombies.next,
- struct kib_conn, ibc_list);
- list_del(&conn->ibc_list);
- if (conn->ibc_reconnect) {
- peer = conn->ibc_peer;
- kiblnd_peer_addref(peer);
- }
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- kiblnd_destroy_conn(conn);
-
- spin_lock_irqsave(lock, flags);
- if (!peer) {
- kfree(conn);
- continue;
- }
-
- conn->ibc_peer = peer;
- if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
- list_add_tail(&conn->ibc_list,
- &kiblnd_data.kib_reconn_list);
- else
- list_add_tail(&conn->ibc_list,
- &kiblnd_data.kib_reconn_wait);
- }
-
- if (!list_empty(&kiblnd_data.kib_connd_conns)) {
- conn = list_entry(kiblnd_data.kib_connd_conns.next,
- struct kib_conn, ibc_list);
- list_del(&conn->ibc_list);
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- kiblnd_disconnect_conn(conn);
- kiblnd_conn_decref(conn);
-
- spin_lock_irqsave(lock, flags);
- }
-
- while (reconn < KIB_RECONN_BREAK) {
- if (kiblnd_data.kib_reconn_sec !=
- ktime_get_real_seconds()) {
- kiblnd_data.kib_reconn_sec = ktime_get_real_seconds();
- list_splice_init(&kiblnd_data.kib_reconn_wait,
- &kiblnd_data.kib_reconn_list);
- }
-
- if (list_empty(&kiblnd_data.kib_reconn_list))
- break;
-
- conn = list_entry(kiblnd_data.kib_reconn_list.next,
- struct kib_conn, ibc_list);
- list_del(&conn->ibc_list);
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- reconn += kiblnd_reconnect_peer(conn->ibc_peer);
- kiblnd_peer_decref(conn->ibc_peer);
- kfree(conn);
-
- spin_lock_irqsave(lock, flags);
- }
-
- /* careful with the jiffy wrap... */
- timeout = (int)(deadline - jiffies);
- if (timeout <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kiblnd_data.kib_peer_hash_size;
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- /*
- * Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval.
- */
- if (*kiblnd_tunables.kib_timeout > n * p)
- chunk = (chunk * n * p) /
- *kiblnd_tunables.kib_timeout;
- if (!chunk)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kiblnd_check_conns(peer_index);
- peer_index = (peer_index + 1) %
- kiblnd_data.kib_peer_hash_size;
- }
-
- deadline += msecs_to_jiffies(p * MSEC_PER_SEC);
- spin_lock_irqsave(lock, flags);
- }
-
- if (dropped_lock)
- continue;
-
- /* Nothing to do for 'timeout' */
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
- spin_unlock_irqrestore(lock, flags);
-
- schedule_timeout(timeout);
-
- remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
- spin_lock_irqsave(lock, flags);
- }
-
- spin_unlock_irqrestore(lock, flags);
-
- kiblnd_thread_fini();
- return 0;
-}
-
-void
-kiblnd_qp_event(struct ib_event *event, void *arg)
-{
- struct kib_conn *conn = arg;
-
- switch (event->event) {
- case IB_EVENT_COMM_EST:
- CDEBUG(D_NET, "%s established\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /*
- * We received a packet but connection isn't established
- * probably handshake packet was lost, so free to
- * force make connection established
- */
- rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
- return;
-
- default:
- CERROR("%s: Async QP event type %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
- return;
- }
-}
-
-static void
-kiblnd_complete(struct ib_wc *wc)
-{
- switch (kiblnd_wreqid2type(wc->wr_id)) {
- default:
- LBUG();
-
- case IBLND_WID_MR:
- if (wc->status != IB_WC_SUCCESS &&
- wc->status != IB_WC_WR_FLUSH_ERR)
- CNETERR("FastReg failed: %d\n", wc->status);
- break;
-
- case IBLND_WID_RDMA:
- /*
- * We only get RDMA completion notification if it fails. All
- * subsequent work items, including the final SEND will fail
- * too. However we can't print out any more info about the
- * failing RDMA because 'tx' might be back on the idle list or
- * even reused already if we didn't manage to post all our work
- * items
- */
- CNETERR("RDMA (tx: %p) failed: %d\n",
- kiblnd_wreqid2ptr(wc->wr_id), wc->status);
- return;
-
- case IBLND_WID_TX:
- kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status);
- return;
-
- case IBLND_WID_RX:
- kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status,
- wc->byte_len);
- return;
- }
-}
-
-void
-kiblnd_cq_completion(struct ib_cq *cq, void *arg)
-{
- /*
- * NB I'm not allowed to schedule this conn once its refcount has
- * reached 0. Since fundamentally I'm racing with scheduler threads
- * consuming my CQ I could be called after all completions have
- * occurred. But in this case, !ibc_nrx && !ibc_nsends_posted
- * and this CQ is about to be destroyed so I NOOP.
- */
- struct kib_conn *conn = arg;
- struct kib_sched_info *sched = conn->ibc_sched;
- unsigned long flags;
-
- LASSERT(cq == conn->ibc_cq);
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
-
- conn->ibc_ready = 1;
-
- if (!conn->ibc_scheduled &&
- (conn->ibc_nrx > 0 ||
- conn->ibc_nsends_posted > 0)) {
- kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
- conn->ibc_scheduled = 1;
- list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns);
-
- if (waitqueue_active(&sched->ibs_waitq))
- wake_up(&sched->ibs_waitq);
- }
-
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-}
-
-void
-kiblnd_cq_event(struct ib_event *event, void *arg)
-{
- struct kib_conn *conn = arg;
-
- CERROR("%s: async CQ event type %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
-}
-
-int
-kiblnd_scheduler(void *arg)
-{
- long id = (long)arg;
- struct kib_sched_info *sched;
- struct kib_conn *conn;
- wait_queue_entry_t wait;
- unsigned long flags;
- struct ib_wc wc;
- int did_something;
- int busy_loops = 0;
- int rc;
-
- init_waitqueue_entry(&wait, current);
-
- sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)];
-
- rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt);
- if (rc) {
- CWARN("Unable to bind on CPU partition %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n",
- sched->ibs_cpt);
- }
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
- if (busy_loops++ >= IBLND_RESCHED) {
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- cond_resched();
- busy_loops = 0;
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
- }
-
- did_something = 0;
-
- if (!list_empty(&sched->ibs_conns)) {
- conn = list_entry(sched->ibs_conns.next, struct kib_conn,
- ibc_sched_list);
- /* take over kib_sched_conns' ref on conn... */
- LASSERT(conn->ibc_scheduled);
- list_del(&conn->ibc_sched_list);
- conn->ibc_ready = 0;
-
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- wc.wr_id = IBLND_WID_INVAL;
-
- rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
- if (!rc) {
- rc = ib_req_notify_cq(conn->ibc_cq,
- IB_CQ_NEXT_COMP);
- if (rc < 0) {
- CWARN("%s: ib_req_notify_cq failed: %d, closing connection\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kiblnd_close_conn(conn, -EIO);
- kiblnd_conn_decref(conn);
- spin_lock_irqsave(&sched->ibs_lock,
- flags);
- continue;
- }
-
- rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
- }
-
- if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) {
- LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n",
- rc, wc.opcode, wc.status,
- wc.vendor_err,
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_state);
- rc = -EINVAL;
- }
-
- if (rc < 0) {
- CWARN("%s: ib_poll_cq failed: %d, closing connection\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- rc);
- kiblnd_close_conn(conn, -EIO);
- kiblnd_conn_decref(conn);
- spin_lock_irqsave(&sched->ibs_lock, flags);
- continue;
- }
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
-
- if (rc || conn->ibc_ready) {
- /*
- * There may be another completion waiting; get
- * another scheduler to check while I handle
- * this one...
- */
- /* +1 ref for sched_conns */
- kiblnd_conn_addref(conn);
- list_add_tail(&conn->ibc_sched_list,
- &sched->ibs_conns);
- if (waitqueue_active(&sched->ibs_waitq))
- wake_up(&sched->ibs_waitq);
- } else {
- conn->ibc_scheduled = 0;
- }
-
- if (rc) {
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
- kiblnd_complete(&wc);
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
- }
-
- kiblnd_conn_decref(conn); /* ...drop my ref from above */
- did_something = 1;
- }
-
- if (did_something)
- continue;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&sched->ibs_waitq, &wait);
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- schedule();
- busy_loops = 0;
-
- remove_wait_queue(&sched->ibs_waitq, &wait);
- spin_lock_irqsave(&sched->ibs_lock, flags);
- }
-
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- kiblnd_thread_fini();
- return 0;
-}
-
-int
-kiblnd_failover_thread(void *arg)
-{
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_dev *dev;
- wait_queue_entry_t wait;
- unsigned long flags;
- int rc;
-
- LASSERT(*kiblnd_tunables.kib_dev_failover);
-
- init_waitqueue_entry(&wait, current);
- write_lock_irqsave(glock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
- int do_failover = 0;
- int long_sleep;
-
- list_for_each_entry(dev, &kiblnd_data.kib_failed_devs,
- ibd_fail_list) {
- if (time_before(cfs_time_current(),
- dev->ibd_next_failover))
- continue;
- do_failover = 1;
- break;
- }
-
- if (do_failover) {
- list_del_init(&dev->ibd_fail_list);
- dev->ibd_failover = 1;
- write_unlock_irqrestore(glock, flags);
-
- rc = kiblnd_dev_failover(dev);
-
- write_lock_irqsave(glock, flags);
-
- LASSERT(dev->ibd_failover);
- dev->ibd_failover = 0;
- if (rc >= 0) { /* Device is OK or failover succeed */
- dev->ibd_next_failover = cfs_time_shift(3);
- continue;
- }
-
- /* failed to failover, retry later */
- dev->ibd_next_failover =
- cfs_time_shift(min(dev->ibd_failed_failover, 10));
- if (kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- }
-
- continue;
- }
-
- /* long sleep if no more pending failover */
- long_sleep = list_empty(&kiblnd_data.kib_failed_devs);
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
- write_unlock_irqrestore(glock, flags);
-
- rc = schedule_timeout(long_sleep ? 10 * HZ :
- HZ);
- remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
- write_lock_irqsave(glock, flags);
-
- if (!long_sleep || rc)
- continue;
-
- /*
- * have a long sleep, routine check all active devices,
- * we need checking like this because if there is not active
- * connection on the dev and no SEND from local, we may listen
- * on wrong HCA for ever while there is a bonding failover
- */
- list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
- if (kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- }
- }
- }
-
- write_unlock_irqrestore(glock, flags);
-
- kiblnd_thread_fini();
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
deleted file mode 100644
index b9235400bf1d..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ /dev/null
@@ -1,287 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd_modparams.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "o2iblnd.h"
-
-static int service = 987;
-module_param(service, int, 0444);
-MODULE_PARM_DESC(service, "service number (within RDMA_PS_TCP)");
-
-static int cksum;
-module_param(cksum, int, 0644);
-MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-module_param(timeout, int, 0644);
-MODULE_PARM_DESC(timeout, "timeout (seconds)");
-
-/*
- * Number of threads in each scheduler pool which is percpt,
- * we will estimate reasonable value based on CPUs if it's set to zero.
- */
-static int nscheds;
-module_param(nscheds, int, 0444);
-MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int ntx = 512;
-module_param(ntx, int, 0444);
-MODULE_PARM_DESC(ntx, "# of message descriptors allocated for each pool");
-
-/* NB: this value is shared by all CPTs */
-static int credits = 256;
-module_param(credits, int, 0444);
-MODULE_PARM_DESC(credits, "# concurrent sends");
-
-static int peer_credits = 8;
-module_param(peer_credits, int, 0444);
-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
-
-static int peer_credits_hiw;
-module_param(peer_credits_hiw, int, 0444);
-MODULE_PARM_DESC(peer_credits_hiw, "when eagerly to return credits");
-
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
-
-static int peer_timeout = 180;
-module_param(peer_timeout, int, 0444);
-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
-
-static char *ipif_name = "ib0";
-module_param(ipif_name, charp, 0444);
-MODULE_PARM_DESC(ipif_name, "IPoIB interface name");
-
-static int retry_count = 5;
-module_param(retry_count, int, 0644);
-MODULE_PARM_DESC(retry_count, "Retransmissions when no ACK received");
-
-static int rnr_retry_count = 6;
-module_param(rnr_retry_count, int, 0644);
-MODULE_PARM_DESC(rnr_retry_count, "RNR retransmissions");
-
-static int keepalive = 100;
-module_param(keepalive, int, 0644);
-MODULE_PARM_DESC(keepalive, "Idle time in seconds before sending a keepalive");
-
-static int ib_mtu;
-module_param(ib_mtu, int, 0444);
-MODULE_PARM_DESC(ib_mtu, "IB MTU 256/512/1024/2048/4096");
-
-static int concurrent_sends;
-module_param(concurrent_sends, int, 0444);
-MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing");
-
-#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS
-static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
-module_param(map_on_demand, int, 0444);
-MODULE_PARM_DESC(map_on_demand, "map on demand");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int fmr_pool_size = 512;
-module_param(fmr_pool_size, int, 0444);
-MODULE_PARM_DESC(fmr_pool_size, "size of fmr pool on each CPT (>= ntx / 4)");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int fmr_flush_trigger = 384;
-module_param(fmr_flush_trigger, int, 0444);
-MODULE_PARM_DESC(fmr_flush_trigger, "# dirty FMRs that triggers pool flush");
-
-static int fmr_cache = 1;
-module_param(fmr_cache, int, 0444);
-MODULE_PARM_DESC(fmr_cache, "non-zero to enable FMR caching");
-
-/*
- * 0: disable failover
- * 1: enable failover if necessary
- * 2: force to failover (for debug)
- */
-static int dev_failover;
-module_param(dev_failover, int, 0444);
-MODULE_PARM_DESC(dev_failover, "HCA failover for bonding (0 off, 1 on, other values reserved)");
-
-static int require_privileged_port;
-module_param(require_privileged_port, int, 0644);
-MODULE_PARM_DESC(require_privileged_port, "require privileged port when accepting connection");
-
-static int use_privileged_port = 1;
-module_param(use_privileged_port, int, 0644);
-MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection");
-
-struct kib_tunables kiblnd_tunables = {
- .kib_dev_failover = &dev_failover,
- .kib_service = &service,
- .kib_cksum = &cksum,
- .kib_timeout = &timeout,
- .kib_keepalive = &keepalive,
- .kib_ntx = &ntx,
- .kib_default_ipif = &ipif_name,
- .kib_retry_count = &retry_count,
- .kib_rnr_retry_count = &rnr_retry_count,
- .kib_ib_mtu = &ib_mtu,
- .kib_require_priv_port = &require_privileged_port,
- .kib_use_priv_port = &use_privileged_port,
- .kib_nscheds = &nscheds
-};
-
-static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
-
-/* # messages/RDMAs in-flight */
-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni)
-{
- if (version == IBLND_MSG_VERSION_1)
- return IBLND_MSG_QUEUE_SIZE_V1;
- else if (ni)
- return ni->ni_peertxcredits;
- else
- return peer_credits;
-}
-
-int kiblnd_tunables_setup(struct lnet_ni *ni)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-
- /*
- * if there was no tunables specified, setup the tunables to be
- * defaulted
- */
- if (!ni->ni_lnd_tunables) {
- ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
- GFP_NOFS);
- if (!ni->ni_lnd_tunables)
- return -ENOMEM;
-
- memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
- &default_tunables, sizeof(*tunables));
- }
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
- /* Current API version */
- tunables->lnd_version = 0;
-
- if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
- CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
- *kiblnd_tunables.kib_ib_mtu);
- return -EINVAL;
- }
-
- if (!ni->ni_peertimeout)
- ni->ni_peertimeout = peer_timeout;
-
- if (!ni->ni_maxtxcredits)
- ni->ni_maxtxcredits = credits;
-
- if (!ni->ni_peertxcredits)
- ni->ni_peertxcredits = peer_credits;
-
- if (!ni->ni_peerrtrcredits)
- ni->ni_peerrtrcredits = peer_buffer_credits;
-
- if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
- ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
-
- if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
- ni->ni_peertxcredits = IBLND_CREDITS_MAX;
-
- if (ni->ni_peertxcredits > credits)
- ni->ni_peertxcredits = credits;
-
- if (!tunables->lnd_peercredits_hiw)
- tunables->lnd_peercredits_hiw = peer_credits_hiw;
-
- if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
-
- if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
-
- if (tunables->lnd_map_on_demand <= 0 ||
- tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
- /* Use the default */
- CWARN("Invalid map_on_demand (%d), expects 1 - %d. Using default of %d\n",
- tunables->lnd_map_on_demand,
- IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND);
- tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
- }
-
- if (tunables->lnd_map_on_demand == 1) {
- /* don't make sense to create map if only one fragment */
- tunables->lnd_map_on_demand = 2;
- }
-
- if (!tunables->lnd_concurrent_sends) {
- if (tunables->lnd_map_on_demand > 0 &&
- tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
- tunables->lnd_concurrent_sends =
- ni->ni_peertxcredits * 2;
- } else {
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
- }
- }
-
- if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
-
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
-
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
- CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n",
- tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
- }
-
- if (!tunables->lnd_fmr_pool_size)
- tunables->lnd_fmr_pool_size = fmr_pool_size;
- if (!tunables->lnd_fmr_flush_trigger)
- tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
- if (!tunables->lnd_fmr_cache)
- tunables->lnd_fmr_cache = fmr_cache;
-
- return 0;
-}
-
-void kiblnd_tunables_init(void)
-{
- default_tunables.lnd_version = 0;
- default_tunables.lnd_peercredits_hiw = peer_credits_hiw,
- default_tunables.lnd_map_on_demand = map_on_demand;
- default_tunables.lnd_concurrent_sends = concurrent_sends;
- default_tunables.lnd_fmr_pool_size = fmr_pool_size;
- default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
- default_tunables.lnd_fmr_cache = fmr_cache;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
deleted file mode 100644
index a7da1abfc804..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += ksocklnd.o
-
-ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib.o
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
deleted file mode 100644
index 7086678e1c3e..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ /dev/null
@@ -1,2918 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/socklnd/socklnd.c
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "socklnd.h"
-
-static struct lnet_lnd the_ksocklnd;
-struct ksock_nal_data ksocknal_data;
-
-static struct ksock_interface *
-ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip)
-{
- struct ksock_net *net = ni->ni_data;
- int i;
- struct ksock_interface *iface;
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- LASSERT(i < LNET_MAX_INTERFACES);
- iface = &net->ksnn_interfaces[i];
-
- if (iface->ksni_ipaddr == ip)
- return iface;
- }
-
- return NULL;
-}
-
-static struct ksock_route *
-ksocknal_create_route(__u32 ipaddr, int port)
-{
- struct ksock_route *route;
-
- route = kzalloc(sizeof(*route), GFP_NOFS);
- if (!route)
- return NULL;
-
- atomic_set(&route->ksnr_refcount, 1);
- route->ksnr_peer = NULL;
- route->ksnr_retry_interval = 0; /* OK to connect at any time */
- route->ksnr_ipaddr = ipaddr;
- route->ksnr_port = port;
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
- route->ksnr_connected = 0;
- route->ksnr_deleted = 0;
- route->ksnr_conn_count = 0;
- route->ksnr_share_count = 0;
-
- return route;
-}
-
-void
-ksocknal_destroy_route(struct ksock_route *route)
-{
- LASSERT(!atomic_read(&route->ksnr_refcount));
-
- if (route->ksnr_peer)
- ksocknal_peer_decref(route->ksnr_peer);
-
- kfree(route);
-}
-
-static int
-ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni,
- struct lnet_process_id id)
-{
- int cpt = lnet_cpt_of_nid(id.nid);
- struct ksock_net *net = ni->ni_data;
- struct ksock_peer *peer;
-
- LASSERT(id.nid != LNET_NID_ANY);
- LASSERT(id.pid != LNET_PID_ANY);
- LASSERT(!in_interrupt());
-
- peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
- if (!peer)
- return -ENOMEM;
-
- peer->ksnp_ni = ni;
- peer->ksnp_id = id;
- atomic_set(&peer->ksnp_refcount, 1); /* 1 ref for caller */
- peer->ksnp_closing = 0;
- peer->ksnp_accepting = 0;
- peer->ksnp_proto = NULL;
- peer->ksnp_last_alive = 0;
- peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
-
- INIT_LIST_HEAD(&peer->ksnp_conns);
- INIT_LIST_HEAD(&peer->ksnp_routes);
- INIT_LIST_HEAD(&peer->ksnp_tx_queue);
- INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
- spin_lock_init(&peer->ksnp_lock);
-
- spin_lock_bh(&net->ksnn_lock);
-
- if (net->ksnn_shutdown) {
- spin_unlock_bh(&net->ksnn_lock);
-
- kfree(peer);
- CERROR("Can't create peer: network shutdown\n");
- return -ESHUTDOWN;
- }
-
- net->ksnn_npeers++;
-
- spin_unlock_bh(&net->ksnn_lock);
-
- *peerp = peer;
- return 0;
-}
-
-void
-ksocknal_destroy_peer(struct ksock_peer *peer)
-{
- struct ksock_net *net = peer->ksnp_ni->ni_data;
-
- CDEBUG(D_NET, "peer %s %p deleted\n",
- libcfs_id2str(peer->ksnp_id), peer);
-
- LASSERT(!atomic_read(&peer->ksnp_refcount));
- LASSERT(!peer->ksnp_accepting);
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
- LASSERT(list_empty(&peer->ksnp_tx_queue));
- LASSERT(list_empty(&peer->ksnp_zc_req_list));
-
- kfree(peer);
-
- /*
- * NB a peer's connections and routes keep a reference on their peer
- * until they are destroyed, so we can be assured that _all_ state to
- * do with this peer has been cleaned up when its refcount drops to
- * zero.
- */
- spin_lock_bh(&net->ksnn_lock);
- net->ksnn_npeers--;
- spin_unlock_bh(&net->ksnn_lock);
-}
-
-struct ksock_peer *
-ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
-{
- struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
- struct ksock_peer *peer;
-
- list_for_each_entry(peer, peer_list, ksnp_list) {
- LASSERT(!peer->ksnp_closing);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (peer->ksnp_id.nid != id.nid ||
- peer->ksnp_id.pid != id.pid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_id2str(id),
- atomic_read(&peer->ksnp_refcount));
- return peer;
- }
- return NULL;
-}
-
-struct ksock_peer *
-ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
-{
- struct ksock_peer *peer;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer) /* +1 ref for caller? */
- ksocknal_peer_addref(peer);
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- return peer;
-}
-
-static void
-ksocknal_unlink_peer_locked(struct ksock_peer *peer)
-{
- int i;
- __u32 ip;
- struct ksock_interface *iface;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
- LASSERT(i < LNET_MAX_INTERFACES);
- ip = peer->ksnp_passive_ips[i];
-
- iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
- /*
- * All IPs in peer->ksnp_passive_ips[] come from the
- * interface list, therefore the call must succeed.
- */
- LASSERT(iface);
-
- CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
- peer, iface, iface->ksni_nroutes);
- iface->ksni_npeers--;
- }
-
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
- LASSERT(!peer->ksnp_closing);
- peer->ksnp_closing = 1;
- list_del(&peer->ksnp_list);
- /* lose peerlist's ref */
- ksocknal_peer_decref(peer);
-}
-
-static int
-ksocknal_get_peer_info(struct lnet_ni *ni, int index,
- struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
- int *port, int *conn_count, int *share_count)
-{
- struct ksock_peer *peer;
- struct list_head *ptmp;
- struct ksock_route *route;
- struct list_head *rtmp;
- int i;
- int j;
- int rc = -ENOENT;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (!peer->ksnp_n_passive_ips &&
- list_empty(&peer->ksnp_routes)) {
- if (index-- > 0)
- continue;
-
- *id = peer->ksnp_id;
- *myip = 0;
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
-
- for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
- if (index-- > 0)
- continue;
-
- *id = peer->ksnp_id;
- *myip = peer->ksnp_passive_ips[j];
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
-
- list_for_each(rtmp, &peer->ksnp_routes) {
- if (index-- > 0)
- continue;
-
- route = list_entry(rtmp, struct ksock_route,
- ksnr_list);
-
- *id = peer->ksnp_id;
- *myip = route->ksnr_myipaddr;
- *peer_ip = route->ksnr_ipaddr;
- *port = route->ksnr_port;
- *conn_count = route->ksnr_conn_count;
- *share_count = route->ksnr_share_count;
- rc = 0;
- goto out;
- }
- }
- }
- out:
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return rc;
-}
-
-static void
-ksocknal_associate_route_conn_locked(struct ksock_route *route,
- struct ksock_conn *conn)
-{
- struct ksock_peer *peer = route->ksnr_peer;
- int type = conn->ksnc_type;
- struct ksock_interface *iface;
-
- conn->ksnc_route = route;
- ksocknal_route_addref(route);
-
- if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
- if (!route->ksnr_myipaddr) {
- /* route wasn't bound locally yet (the initial route) */
- CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr,
- &conn->ksnc_myipaddr);
- } else {
- CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr,
- &route->ksnr_myipaddr,
- &conn->ksnc_myipaddr);
-
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface)
- iface->ksni_nroutes--;
- }
- route->ksnr_myipaddr = conn->ksnc_myipaddr;
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface)
- iface->ksni_nroutes++;
- }
-
- route->ksnr_connected |= (1 << type);
- route->ksnr_conn_count++;
-
- /*
- * Successful connection => further attempts can
- * proceed immediately
- */
- route->ksnr_retry_interval = 0;
-}
-
-static void
-ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route)
-{
- struct list_head *tmp;
- struct ksock_conn *conn;
- struct ksock_route *route2;
-
- LASSERT(!peer->ksnp_closing);
- LASSERT(!route->ksnr_peer);
- LASSERT(!route->ksnr_scheduled);
- LASSERT(!route->ksnr_connecting);
- LASSERT(!route->ksnr_connected);
-
- /* LASSERT(unique) */
- list_for_each(tmp, &peer->ksnp_routes) {
- route2 = list_entry(tmp, struct ksock_route, ksnr_list);
-
- if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
- CERROR("Duplicate route %s %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr);
- LBUG();
- }
- }
-
- route->ksnr_peer = peer;
- ksocknal_peer_addref(peer);
- /* peer's routelist takes over my ref on 'route' */
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-
- list_for_each(tmp, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- /* keep going (typed routes) */
- }
-}
-
-static void
-ksocknal_del_route_locked(struct ksock_route *route)
-{
- struct ksock_peer *peer = route->ksnr_peer;
- struct ksock_interface *iface;
- struct ksock_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
-
- LASSERT(!route->ksnr_deleted);
-
- /* Close associated conns */
- list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
- if (conn->ksnc_route != route)
- continue;
-
- ksocknal_close_conn_locked(conn, 0);
- }
-
- if (route->ksnr_myipaddr) {
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface)
- iface->ksni_nroutes--;
- }
-
- route->ksnr_deleted = 1;
- list_del(&route->ksnr_list);
- ksocknal_route_decref(route); /* drop peer's ref */
-
- if (list_empty(&peer->ksnp_routes) &&
- list_empty(&peer->ksnp_conns)) {
- /*
- * I've just removed the last route to a peer with no active
- * connections
- */
- ksocknal_unlink_peer_locked(peer);
- }
-}
-
-int
-ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
- int port)
-{
- struct ksock_peer *peer;
- struct ksock_peer *peer2;
- struct ksock_route *route;
- struct ksock_route *route2;
- int rc;
-
- if (id.nid == LNET_NID_ANY ||
- id.pid == LNET_PID_ANY)
- return -EINVAL;
-
- /* Have a brand new peer ready... */
- rc = ksocknal_create_peer(&peer, ni, id);
- if (rc)
- return rc;
-
- route = ksocknal_create_route(ipaddr, port);
- if (!route) {
- ksocknal_peer_decref(peer);
- return -ENOMEM;
- }
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- /* always called with a ref on ni, so shutdown can't have started */
- LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
-
- peer2 = ksocknal_find_peer_locked(ni, id);
- if (peer2) {
- ksocknal_peer_decref(peer);
- peer = peer2;
- } else {
- /* peer table takes my ref on peer */
- list_add_tail(&peer->ksnp_list,
- ksocknal_nid2peerlist(id.nid));
- }
-
- list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
- if (route2->ksnr_ipaddr == ipaddr) {
- /* Route already exists, use the old one */
- ksocknal_route_decref(route);
- route2->ksnr_share_count++;
- goto out;
- }
- }
- /* Route doesn't already exist, add the new one */
- ksocknal_add_route_locked(peer, route);
- route->ksnr_share_count++;
-out:
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return 0;
-}
-
-static void
-ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip)
-{
- struct ksock_conn *conn;
- struct ksock_route *route;
- struct list_head *tmp;
- struct list_head *nxt;
- int nshared;
-
- LASSERT(!peer->ksnp_closing);
-
- /* Extra ref prevents peer disappearing until I'm done with it */
- ksocknal_peer_addref(peer);
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- /* no match */
- if (!(!ip || route->ksnr_ipaddr == ip))
- continue;
-
- route->ksnr_share_count = 0;
- /* This deletes associated conns too */
- ksocknal_del_route_locked(route);
- }
-
- nshared = 0;
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
- nshared += route->ksnr_share_count;
- }
-
- if (!nshared) {
- /*
- * remove everything else if there are no explicit entries
- * left
- */
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- /* we should only be removing auto-entries */
- LASSERT(!route->ksnr_share_count);
- ksocknal_del_route_locked(route);
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- ksocknal_close_conn_locked(conn, 0);
- }
- }
-
- ksocknal_peer_decref(peer);
- /* NB peer unlinks itself when last conn/route is removed */
-}
-
-static int
-ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
-{
- LIST_HEAD(zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- struct ksock_peer *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (id.nid != LNET_NID_ANY) {
- lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- } else {
- lo = 0;
- hi = ksocknal_data.ksnd_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
- (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
- continue;
-
- ksocknal_peer_addref(peer); /* a ref for me... */
-
- ksocknal_del_peer_locked(peer, ip);
-
- if (peer->ksnp_closing &&
- !list_empty(&peer->ksnp_tx_queue)) {
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
-
- list_splice_init(&peer->ksnp_tx_queue,
- &zombies);
- }
-
- ksocknal_peer_decref(peer); /* ...till here */
-
- rc = 0; /* matched! */
- }
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_txlist_done(ni, &zombies, 1);
-
- return rc;
-}
-
-static struct ksock_conn *
-ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
-{
- struct ksock_peer *peer;
- struct list_head *ptmp;
- struct ksock_conn *conn;
- struct list_head *ctmp;
- int i;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- LASSERT(!peer->ksnp_closing);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- list_for_each(ctmp, &peer->ksnp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry(ctmp, struct ksock_conn,
- ksnc_list);
- ksocknal_conn_addref(conn);
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return conn;
- }
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return NULL;
-}
-
-static struct ksock_sched *
-ksocknal_choose_scheduler_locked(unsigned int cpt)
-{
- struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
- struct ksock_sched *sched;
- int i;
-
- LASSERT(info->ksi_nthreads > 0);
-
- sched = &info->ksi_scheds[0];
- /*
- * NB: it's safe so far, but info->ksi_nthreads could be changed
- * at runtime when we have dynamic LNet configuration, then we
- * need to take care of this.
- */
- for (i = 1; i < info->ksi_nthreads; i++) {
- if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
- sched = &info->ksi_scheds[i];
- }
-
- return sched;
-}
-
-static int
-ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs)
-{
- struct ksock_net *net = ni->ni_data;
- int i;
- int nip;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- nip = net->ksnn_ninterfaces;
- LASSERT(nip <= LNET_MAX_INTERFACES);
-
- /*
- * Only offer interfaces for additional connections if I have
- * more than one.
- */
- if (nip < 2) {
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return 0;
- }
-
- for (i = 0; i < nip; i++) {
- ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
- LASSERT(ipaddrs[i]);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return nip;
-}
-
-static int
-ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
-{
- int best_netmatch = 0;
- int best_xor = 0;
- int best = -1;
- int this_xor;
- int this_netmatch;
- int i;
-
- for (i = 0; i < nips; i++) {
- if (!ips[i])
- continue;
-
- this_xor = ips[i] ^ iface->ksni_ipaddr;
- this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0;
-
- if (!(best < 0 ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_xor > this_xor)))
- continue;
-
- best = i;
- best_netmatch = this_netmatch;
- best_xor = this_xor;
- }
-
- LASSERT(best >= 0);
- return best;
-}
-
-static int
-ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
-{
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- struct ksock_net *net = peer->ksnp_ni->ni_data;
- struct ksock_interface *iface;
- struct ksock_interface *best_iface;
- int n_ips;
- int i;
- int j;
- int k;
- __u32 ip;
- __u32 xor;
- int this_netmatch;
- int best_netmatch;
- int best_npeers;
-
- /*
- * CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness shouldn't matter
- */
- /*
- * Also note that I'm not going to return more than n_peerips
- * interfaces, even if I have more myself
- */
- write_lock_bh(global_lock);
-
- LASSERT(n_peerips <= LNET_MAX_INTERFACES);
- LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
- /*
- * Only match interfaces for additional connections
- * if I have > 1 interface
- */
- n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
- min(n_peerips, net->ksnn_ninterfaces);
-
- for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
- /* ^ yes really... */
-
- /*
- * If we have any new interfaces, first tick off all the
- * peer IPs that match old interfaces, then choose new
- * interfaces to match the remaining peer IPS.
- * We don't forget interfaces we've stopped using; we might
- * start using them again...
- */
- if (i < peer->ksnp_n_passive_ips) {
- /* Old interface. */
- ip = peer->ksnp_passive_ips[i];
- best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
-
- /* peer passive ips are kept up to date */
- LASSERT(best_iface);
- } else {
- /* choose a new interface */
- LASSERT(i == peer->ksnp_n_passive_ips);
-
- best_iface = NULL;
- best_netmatch = 0;
- best_npeers = 0;
-
- for (j = 0; j < net->ksnn_ninterfaces; j++) {
- iface = &net->ksnn_interfaces[j];
- ip = iface->ksni_ipaddr;
-
- for (k = 0; k < peer->ksnp_n_passive_ips; k++)
- if (peer->ksnp_passive_ips[k] == ip)
- break;
-
- if (k < peer->ksnp_n_passive_ips) /* using it already */
- continue;
-
- k = ksocknal_match_peerip(iface, peerips,
- n_peerips);
- xor = ip ^ peerips[k];
- this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
-
- if (!(!best_iface ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_npeers > iface->ksni_npeers)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_npeers = iface->ksni_npeers;
- }
-
- LASSERT(best_iface);
-
- best_iface->ksni_npeers++;
- ip = best_iface->ksni_ipaddr;
- peer->ksnp_passive_ips[i] = ip;
- peer->ksnp_n_passive_ips = i + 1;
- }
-
- /* mark the best matching peer IP used */
- j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
- peerips[j] = 0;
- }
-
- /* Overwrite input peer IP addresses */
- memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
-
- write_unlock_bh(global_lock);
-
- return n_ips;
-}
-
-static void
-ksocknal_create_routes(struct ksock_peer *peer, int port,
- __u32 *peer_ipaddrs, int npeer_ipaddrs)
-{
- struct ksock_route *newroute = NULL;
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- struct lnet_ni *ni = peer->ksnp_ni;
- struct ksock_net *net = ni->ni_data;
- struct list_head *rtmp;
- struct ksock_route *route;
- struct ksock_interface *iface;
- struct ksock_interface *best_iface;
- int best_netmatch;
- int this_netmatch;
- int best_nroutes;
- int i;
- int j;
-
- /*
- * CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness here shouldn't matter
- */
- write_lock_bh(global_lock);
-
- if (net->ksnn_ninterfaces < 2) {
- /*
- * Only create additional connections
- * if I have > 1 interface
- */
- write_unlock_bh(global_lock);
- return;
- }
-
- LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES);
-
- for (i = 0; i < npeer_ipaddrs; i++) {
- if (newroute) {
- newroute->ksnr_ipaddr = peer_ipaddrs[i];
- } else {
- write_unlock_bh(global_lock);
-
- newroute = ksocknal_create_route(peer_ipaddrs[i], port);
- if (!newroute)
- return;
-
- write_lock_bh(global_lock);
- }
-
- if (peer->ksnp_closing) {
- /* peer got closed under me */
- break;
- }
-
- /* Already got a route? */
- route = NULL;
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, struct ksock_route, ksnr_list);
-
- if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
- break;
-
- route = NULL;
- }
- if (route)
- continue;
-
- best_iface = NULL;
- best_nroutes = 0;
- best_netmatch = 0;
-
- LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
- /* Select interface to connect from */
- for (j = 0; j < net->ksnn_ninterfaces; j++) {
- iface = &net->ksnn_interfaces[j];
-
- /* Using this interface already? */
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, struct ksock_route,
- ksnr_list);
-
- if (route->ksnr_myipaddr == iface->ksni_ipaddr)
- break;
-
- route = NULL;
- }
- if (route)
- continue;
-
- this_netmatch = (!((iface->ksni_ipaddr ^
- newroute->ksnr_ipaddr) &
- iface->ksni_netmask)) ? 1 : 0;
-
- if (!(!best_iface ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_nroutes > iface->ksni_nroutes)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_nroutes = iface->ksni_nroutes;
- }
-
- if (!best_iface)
- continue;
-
- newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
- best_iface->ksni_nroutes++;
-
- ksocknal_add_route_locked(peer, newroute);
- newroute = NULL;
- }
-
- write_unlock_bh(global_lock);
- if (newroute)
- ksocknal_route_decref(newroute);
-}
-
-int
-ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
-{
- struct ksock_connreq *cr;
- int rc;
- __u32 peer_ip;
- int peer_port;
-
- rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT(!rc); /* we succeeded before */
-
- cr = kzalloc(sizeof(*cr), GFP_NOFS);
- if (!cr) {
- LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
- &peer_ip);
- return -ENOMEM;
- }
-
- lnet_ni_addref(ni);
- cr->ksncr_ni = ni;
- cr->ksncr_sock = sock;
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-
- list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
- wake_up(&ksocknal_data.ksnd_connd_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
- return 0;
-}
-
-static int
-ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr)
-{
- struct ksock_route *route;
-
- list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
- if (route->ksnr_ipaddr == ipaddr)
- return route->ksnr_connecting;
- }
- return 0;
-}
-
-int
-ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
- struct socket *sock, int type)
-{
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- LIST_HEAD(zombies);
- struct lnet_process_id peerid;
- struct list_head *tmp;
- __u64 incarnation;
- struct ksock_conn *conn;
- struct ksock_conn *conn2;
- struct ksock_peer *peer = NULL;
- struct ksock_peer *peer2;
- struct ksock_sched *sched;
- struct ksock_hello_msg *hello;
- int cpt;
- struct ksock_tx *tx;
- struct ksock_tx *txtmp;
- int rc;
- int active;
- char *warn = NULL;
-
- active = !!route;
-
- LASSERT(active == (type != SOCKLND_CONN_NONE));
-
- conn = kzalloc(sizeof(*conn), GFP_NOFS);
- if (!conn) {
- rc = -ENOMEM;
- goto failed_0;
- }
-
- conn->ksnc_peer = NULL;
- conn->ksnc_route = NULL;
- conn->ksnc_sock = sock;
- /*
- * 2 ref, 1 for conn, another extra ref prevents socket
- * being closed before establishment of connection
- */
- atomic_set(&conn->ksnc_sock_refcount, 2);
- conn->ksnc_type = type;
- ksocknal_lib_save_callback(sock, conn);
- atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
-
- conn->ksnc_rx_ready = 0;
- conn->ksnc_rx_scheduled = 0;
-
- INIT_LIST_HEAD(&conn->ksnc_tx_queue);
- conn->ksnc_tx_ready = 0;
- conn->ksnc_tx_scheduled = 0;
- conn->ksnc_tx_carrier = NULL;
- atomic_set(&conn->ksnc_tx_nob, 0);
-
- hello = kvzalloc(offsetof(struct ksock_hello_msg,
- kshm_ips[LNET_MAX_INTERFACES]),
- GFP_KERNEL);
- if (!hello) {
- rc = -ENOMEM;
- goto failed_1;
- }
-
- /* stash conn's local and remote addrs */
- rc = ksocknal_lib_get_conn_addrs(conn);
- if (rc)
- goto failed_1;
-
- /*
- * Find out/confirm peer's NID and connection type and get the
- * vector of interfaces she's willing to let me connect to.
- * Passive connections use the listener timeout since the peer sends
- * eagerly
- */
- if (active) {
- peer = route->ksnr_peer;
- LASSERT(ni == peer->ksnp_ni);
-
- /* Active connection sends HELLO eagerly */
- hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
- peerid = peer->ksnp_id;
-
- write_lock_bh(global_lock);
- conn->ksnc_proto = peer->ksnp_proto;
- write_unlock_bh(global_lock);
-
- if (!conn->ksnc_proto) {
- conn->ksnc_proto = &ksocknal_protocol_v3x;
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol == 2)
- conn->ksnc_proto = &ksocknal_protocol_v2x;
- else if (*ksocknal_tunables.ksnd_protocol == 1)
- conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
- }
-
- rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
- if (rc)
- goto failed_1;
- } else {
- peerid.nid = LNET_NID_ANY;
- peerid.pid = LNET_PID_ANY;
-
- /* Passive, get protocol from peer */
- conn->ksnc_proto = NULL;
- }
-
- rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
- if (rc < 0)
- goto failed_1;
-
- LASSERT(!rc || active);
- LASSERT(conn->ksnc_proto);
- LASSERT(peerid.nid != LNET_NID_ANY);
-
- cpt = lnet_cpt_of_nid(peerid.nid);
-
- if (active) {
- ksocknal_peer_addref(peer);
- write_lock_bh(global_lock);
- } else {
- rc = ksocknal_create_peer(&peer, ni, peerid);
- if (rc)
- goto failed_1;
-
- write_lock_bh(global_lock);
-
- /* called with a ref on ni, so shutdown can't have started */
- LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
-
- peer2 = ksocknal_find_peer_locked(ni, peerid);
- if (!peer2) {
- /*
- * NB this puts an "empty" peer in the peer
- * table (which takes my ref)
- */
- list_add_tail(&peer->ksnp_list,
- ksocknal_nid2peerlist(peerid.nid));
- } else {
- ksocknal_peer_decref(peer);
- peer = peer2;
- }
-
- /* +1 ref for me */
- ksocknal_peer_addref(peer);
- peer->ksnp_accepting++;
-
- /*
- * Am I already connecting to this guy? Resolve in
- * favour of higher NID...
- */
- if (peerid.nid < ni->ni_nid &&
- ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
- rc = EALREADY;
- warn = "connection race resolution";
- goto failed_2;
- }
- }
-
- if (peer->ksnp_closing ||
- (active && route->ksnr_deleted)) {
- /* peer/route got closed under me */
- rc = -ESTALE;
- warn = "peer/route removed";
- goto failed_2;
- }
-
- if (!peer->ksnp_proto) {
- /*
- * Never connected before.
- * NB recv_hello may have returned EPROTO to signal my peer
- * wants a different protocol than the one I asked for.
- */
- LASSERT(list_empty(&peer->ksnp_conns));
-
- peer->ksnp_proto = conn->ksnc_proto;
- peer->ksnp_incarnation = incarnation;
- }
-
- if (peer->ksnp_proto != conn->ksnc_proto ||
- peer->ksnp_incarnation != incarnation) {
- /* Peer rebooted or I've got the wrong protocol version */
- ksocknal_close_peer_conns_locked(peer, 0, 0);
-
- peer->ksnp_proto = NULL;
- rc = ESTALE;
- warn = peer->ksnp_incarnation != incarnation ?
- "peer rebooted" :
- "wrong proto version";
- goto failed_2;
- }
-
- switch (rc) {
- default:
- LBUG();
- case 0:
- break;
- case EALREADY:
- warn = "lost conn race";
- goto failed_2;
- case EPROTO:
- warn = "retry with different protocol version";
- goto failed_2;
- }
-
- /*
- * Refuse to duplicate an existing connection, unless this is a
- * loopback connection
- */
- if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
- conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
- conn2->ksnc_type != conn->ksnc_type)
- continue;
-
- /*
- * Reply on a passive connection attempt so the peer
- * realises we're connected.
- */
- LASSERT(!rc);
- if (!active)
- rc = EALREADY;
-
- warn = "duplicate";
- goto failed_2;
- }
- }
-
- /*
- * If the connection created by this route didn't bind to the IP
- * address the route connected to, the connection/route matching
- * code below probably isn't going to work.
- */
- if (active &&
- route->ksnr_ipaddr != conn->ksnc_ipaddr) {
- CERROR("Route %s %pI4h connected to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr,
- &conn->ksnc_ipaddr);
- }
-
- /*
- * Search for a route corresponding to the new connection and
- * create an association. This allows incoming connections created
- * by routes in my peer to match my own route entries so I don't
- * continually create duplicate routes.
- */
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- break;
- }
-
- conn->ksnc_peer = peer; /* conn takes my ref on peer */
- peer->ksnp_last_alive = cfs_time_current();
- peer->ksnp_send_keepalive = 0;
- peer->ksnp_error = 0;
-
- sched = ksocknal_choose_scheduler_locked(cpt);
- sched->kss_nconns++;
- conn->ksnc_scheduler = sched;
-
- conn->ksnc_tx_last_post = cfs_time_current();
- /* Set the deadline for the outgoing HELLO to drain */
- conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
- conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- mb(); /* order with adding to peer's conn list */
-
- list_add(&conn->ksnc_list, &peer->ksnp_conns);
- ksocknal_conn_addref(conn);
-
- ksocknal_new_packet(conn, 0);
-
- conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
-
- /* Take packets blocking for this connection. */
- list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
- int match = conn->ksnc_proto->pro_match_tx(conn, tx,
- tx->tx_nonblk);
-
- if (match == SOCKNAL_MATCH_NO)
- continue;
-
- list_del(&tx->tx_list);
- ksocknal_queue_tx_locked(tx, conn);
- }
-
- write_unlock_bh(global_lock);
-
- /*
- * We've now got a new connection. Any errors from here on are just
- * like "normal" comms errors and we close the connection normally.
- * NB (a) we still have to send the reply HELLO for passive
- * connections,
- * (b) normal I/O on the conn is blocked until I setup and call the
- * socket callbacks.
- */
- CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
- libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
- &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
- conn->ksnc_port, incarnation, cpt,
- (int)(sched - &sched->kss_info->ksi_scheds[0]));
-
- if (active) {
- /* additional routes after interface exchange? */
- ksocknal_create_routes(peer, conn->ksnc_port,
- hello->kshm_ips, hello->kshm_nips);
- } else {
- hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
- hello->kshm_nips);
- rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
- }
-
- kvfree(hello);
-
- /*
- * setup the socket AFTER I've received hello (it disables
- * SO_LINGER). I might call back to the acceptor who may want
- * to send a protocol version response and then close the
- * socket; this ensures the socket only tears down after the
- * response has been sent.
- */
- if (!rc)
- rc = ksocknal_lib_setup_sock(sock);
-
- write_lock_bh(global_lock);
-
- /* NB my callbacks block while I hold ksnd_global_lock */
- ksocknal_lib_set_callback(sock, conn);
-
- if (!active)
- peer->ksnp_accepting--;
-
- write_unlock_bh(global_lock);
-
- if (rc) {
- write_lock_bh(global_lock);
- if (!conn->ksnc_closing) {
- /* could be closed by another thread */
- ksocknal_close_conn_locked(conn, rc);
- }
- write_unlock_bh(global_lock);
- } else if (!ksocknal_connsock_addref(conn)) {
- /* Allow I/O to proceed. */
- ksocknal_read_callback(conn);
- ksocknal_write_callback(conn);
- ksocknal_connsock_decref(conn);
- }
-
- ksocknal_connsock_decref(conn);
- ksocknal_conn_decref(conn);
- return rc;
-
- failed_2:
- if (!peer->ksnp_closing &&
- list_empty(&peer->ksnp_conns) &&
- list_empty(&peer->ksnp_routes)) {
- list_add(&zombies, &peer->ksnp_tx_queue);
- list_del_init(&peer->ksnp_tx_queue);
- ksocknal_unlink_peer_locked(peer);
- }
-
- write_unlock_bh(global_lock);
-
- if (warn) {
- if (rc < 0)
- CERROR("Not creating conn %s type %d: %s\n",
- libcfs_id2str(peerid), conn->ksnc_type, warn);
- else
- CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
- libcfs_id2str(peerid), conn->ksnc_type, warn);
- }
-
- if (!active) {
- if (rc > 0) {
- /*
- * Request retry by replying with CONN_NONE
- * ksnc_proto has been set already
- */
- conn->ksnc_type = SOCKLND_CONN_NONE;
- hello->kshm_nips = 0;
- ksocknal_send_hello(ni, conn, peerid.nid, hello);
- }
-
- write_lock_bh(global_lock);
- peer->ksnp_accepting--;
- write_unlock_bh(global_lock);
- }
-
- ksocknal_txlist_done(ni, &zombies, 1);
- ksocknal_peer_decref(peer);
-
-failed_1:
- kvfree(hello);
-
- kfree(conn);
-
-failed_0:
- sock_release(sock);
- return rc;
-}
-
-void
-ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
-{
- /*
- * This just does the immmediate housekeeping, and queues the
- * connection for the reaper to terminate.
- * Caller holds ksnd_global_lock exclusively in irq context
- */
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_route *route;
- struct ksock_conn *conn2;
- struct list_head *tmp;
-
- LASSERT(!peer->ksnp_error);
- LASSERT(!conn->ksnc_closing);
- conn->ksnc_closing = 1;
-
- /* ksnd_deathrow_conns takes over peer's ref */
- list_del(&conn->ksnc_list);
-
- route = conn->ksnc_route;
- if (route) {
- /* dissociate conn from route... */
- LASSERT(!route->ksnr_deleted);
- LASSERT(route->ksnr_connected & (1 << conn->ksnc_type));
-
- conn2 = NULL;
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn2->ksnc_route == route &&
- conn2->ksnc_type == conn->ksnc_type)
- break;
-
- conn2 = NULL;
- }
- if (!conn2)
- route->ksnr_connected &= ~(1 << conn->ksnc_type);
-
- conn->ksnc_route = NULL;
-
- ksocknal_route_decref(route); /* drop conn's ref on route */
- }
-
- if (list_empty(&peer->ksnp_conns)) {
- /* No more connections to this peer */
-
- if (!list_empty(&peer->ksnp_tx_queue)) {
- struct ksock_tx *tx;
-
- LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
-
- /*
- * throw them to the last connection...,
- * these TXs will be send to /dev/null by scheduler
- */
- list_for_each_entry(tx, &peer->ksnp_tx_queue,
- tx_list)
- ksocknal_tx_prep(conn, tx);
-
- spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
- list_splice_init(&peer->ksnp_tx_queue,
- &conn->ksnc_tx_queue);
- spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
- }
-
- peer->ksnp_proto = NULL; /* renegotiate protocol version */
- peer->ksnp_error = error; /* stash last conn close reason */
-
- if (list_empty(&peer->ksnp_routes)) {
- /*
- * I've just closed last conn belonging to a
- * peer with no routes to it
- */
- ksocknal_unlink_peer_locked(peer);
- }
- }
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- list_add_tail(&conn->ksnc_list,
- &ksocknal_data.ksnd_deathrow_conns);
- wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_peer_failed(struct ksock_peer *peer)
-{
- int notify = 0;
- unsigned long last_alive = 0;
-
- /*
- * There has been a connection failure or comms error; but I'll only
- * tell LNET I think the peer is dead if it's to another kernel and
- * there are no connections or connection attempts in existence.
- */
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) &&
- list_empty(&peer->ksnp_conns) &&
- !peer->ksnp_accepting &&
- !ksocknal_find_connecting_route_locked(peer)) {
- notify = 1;
- last_alive = peer->ksnp_last_alive;
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- if (notify)
- lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
- last_alive);
-}
-
-void
-ksocknal_finalize_zcreq(struct ksock_conn *conn)
-{
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_tx *tx;
- struct ksock_tx *temp;
- struct ksock_tx *tmp;
- LIST_HEAD(zlist);
-
- /*
- * NB safe to finalize TXs because closing of socket will
- * abort all buffered data
- */
- LASSERT(!conn->ksnc_sock);
-
- spin_lock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
- if (tx->tx_conn != conn)
- continue;
-
- LASSERT(tx->tx_msg.ksm_zc_cookies[0]);
-
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- tx->tx_zc_aborted = 1; /* mark it as not-acked */
- list_del(&tx->tx_zc_list);
- list_add(&tx->tx_zc_list, &zlist);
- }
-
- spin_unlock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
- list_del(&tx->tx_zc_list);
- ksocknal_tx_decref(tx);
- }
-}
-
-void
-ksocknal_terminate_conn(struct ksock_conn *conn)
-{
- /*
- * This gets called by the reaper (guaranteed thread context) to
- * disengage the socket from its callbacks and close it.
- * ksnc_refcount will eventually hit zero, and then the reaper will
- * destroy it.
- */
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_sched *sched = conn->ksnc_scheduler;
- int failed = 0;
-
- LASSERT(conn->ksnc_closing);
-
- /* wake up the scheduler to "send" all remaining packets to /dev/null */
- spin_lock_bh(&sched->kss_lock);
-
- /* a closing conn is always ready to tx */
- conn->ksnc_tx_ready = 1;
-
- if (!conn->ksnc_tx_scheduled &&
- !list_empty(&conn->ksnc_tx_queue)) {
- list_add_tail(&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- wake_up(&sched->kss_waitq);
- }
-
- spin_unlock_bh(&sched->kss_lock);
-
- /* serialise with callbacks */
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
-
- /*
- * OK, so this conn may not be completely disengaged from its
- * scheduler yet, but it _has_ committed to terminate...
- */
- conn->ksnc_scheduler->kss_nconns--;
-
- if (peer->ksnp_error) {
- /* peer's last conn closed in error */
- LASSERT(list_empty(&peer->ksnp_conns));
- failed = 1;
- peer->ksnp_error = 0; /* avoid multiple notifications */
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (failed)
- ksocknal_peer_failed(peer);
-
- /*
- * The socket is closed on the final put; either here, or in
- * ksocknal_{send,recv}msg(). Since we set up the linger2 option
- * when the connection was established, this will close the socket
- * immediately, aborting anything buffered in it. Any hung
- * zero-copy transmits will therefore complete in finite time.
- */
- ksocknal_connsock_decref(conn);
-}
-
-void
-ksocknal_queue_zombie_conn(struct ksock_conn *conn)
-{
- /* Queue the conn for the reaper to destroy */
-
- LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
- wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_destroy_conn(struct ksock_conn *conn)
-{
- unsigned long last_rcv;
-
- /* Final coup-de-grace of the reaper */
- CDEBUG(D_NET, "connection %p\n", conn);
-
- LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
- LASSERT(!atomic_read(&conn->ksnc_sock_refcount));
- LASSERT(!conn->ksnc_sock);
- LASSERT(!conn->ksnc_route);
- LASSERT(!conn->ksnc_tx_scheduled);
- LASSERT(!conn->ksnc_rx_scheduled);
- LASSERT(list_empty(&conn->ksnc_tx_queue));
-
- /* complete current receive if any */
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_LNET_PAYLOAD:
- last_rcv = conn->ksnc_rx_deadline -
- *ksocknal_tunables.ksnd_timeout * HZ;
- CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
- &conn->ksnc_ipaddr, conn->ksnc_port,
- iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
- cfs_duration_sec(cfs_time_sub(cfs_time_current(),
- last_rcv)));
- lnet_finalize(conn->ksnc_peer->ksnp_ni,
- conn->ksnc_cookie, -EIO);
- break;
- case SOCKNAL_RX_LNET_HEADER:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port,
- conn->ksnc_proto->pro_version);
- break;
- case SOCKNAL_RX_KSM_HEADER:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port,
- conn->ksnc_proto->pro_version);
- break;
- case SOCKNAL_RX_SLOP:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port);
- break;
- default:
- LBUG();
- break;
- }
-
- ksocknal_peer_decref(conn->ksnc_peer);
-
- kfree(conn);
-}
-
-int
-ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why)
-{
- struct ksock_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
- if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
- count++;
- ksocknal_close_conn_locked(conn, why);
- }
- }
-
- return count;
-}
-
-int
-ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
-{
- struct ksock_peer *peer = conn->ksnc_peer;
- __u32 ipaddr = conn->ksnc_ipaddr;
- int count;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- count = ksocknal_close_peer_conns_locked(peer, ipaddr, why);
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return count;
-}
-
-int
-ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
-{
- struct ksock_peer *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (id.nid != LNET_NID_ANY) {
- lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- } else {
- lo = 0;
- hi = ksocknal_data.ksnd_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt,
- &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
- (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
- continue;
-
- count += ksocknal_close_peer_conns_locked(peer, ipaddr,
- 0);
- }
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- /* wildcards always succeed */
- if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr)
- return 0;
-
- if (!count)
- return -ENOENT;
- else
- return 0;
-}
-
-void
-ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive)
-{
- /*
- * The router is telling me she's been notified of a change in
- * gateway state....
- */
- struct lnet_process_id id = {0};
-
- id.nid = gw_nid;
- id.pid = LNET_PID_ANY;
-
- CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
- alive ? "up" : "down");
-
- if (!alive) {
- /* If the gateway crashed, close all open connections... */
- ksocknal_close_matching_conns(id, 0);
- return;
- }
-
- /*
- * ...otherwise do nothing. We can only establish new connections
- * if we have autroutes, and these connect on demand.
- */
-}
-
-void
-ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when)
-{
- int connect = 1;
- unsigned long last_alive = 0;
- unsigned long now = cfs_time_current();
- struct ksock_peer *peer = NULL;
- rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
- struct lnet_process_id id = {
- .nid = nid,
- .pid = LNET_PID_LUSTRE,
- };
-
- read_lock(glock);
-
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer) {
- struct ksock_conn *conn;
- int bufnob;
-
- list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) {
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
-
- if (bufnob < conn->ksnc_tx_bufnob) {
- /* something got ACKed */
- conn->ksnc_tx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- peer->ksnp_last_alive = now;
- conn->ksnc_tx_bufnob = bufnob;
- }
- }
-
- last_alive = peer->ksnp_last_alive;
- if (!ksocknal_find_connectable_route_locked(peer))
- connect = 0;
- }
-
- read_unlock(glock);
-
- if (last_alive)
- *when = last_alive;
-
- CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
- libcfs_nid2str(nid), peer,
- last_alive ? cfs_duration_sec(now - last_alive) : -1,
- connect);
-
- if (!connect)
- return;
-
- ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
-
- write_lock_bh(glock);
-
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer)
- ksocknal_launch_all_connections_locked(peer);
-
- write_unlock_bh(glock);
-}
-
-static void
-ksocknal_push_peer(struct ksock_peer *peer)
-{
- int index;
- int i;
- struct list_head *tmp;
- struct ksock_conn *conn;
-
- for (index = 0; ; index++) {
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- i = 0;
- conn = NULL;
-
- list_for_each(tmp, &peer->ksnp_conns) {
- if (i++ == index) {
- conn = list_entry(tmp, struct ksock_conn,
- ksnc_list);
- ksocknal_conn_addref(conn);
- break;
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- if (!conn)
- break;
-
- ksocknal_lib_push_conn(conn);
- ksocknal_conn_decref(conn);
- }
-}
-
-static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
-{
- struct list_head *start;
- struct list_head *end;
- struct list_head *tmp;
- int rc = -ENOENT;
- unsigned int hsize = ksocknal_data.ksnd_peer_hash_size;
-
- if (id.nid == LNET_NID_ANY) {
- start = &ksocknal_data.ksnd_peers[0];
- end = &ksocknal_data.ksnd_peers[hsize - 1];
- } else {
- start = ksocknal_nid2peerlist(id.nid);
- end = ksocknal_nid2peerlist(id.nid);
- }
-
- for (tmp = start; tmp <= end; tmp++) {
- int peer_off; /* searching offset in peer hash table */
-
- for (peer_off = 0; ; peer_off++) {
- struct ksock_peer *peer;
- int i = 0;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
- list_for_each_entry(peer, tmp, ksnp_list) {
- if (!((id.nid == LNET_NID_ANY ||
- id.nid == peer->ksnp_id.nid) &&
- (id.pid == LNET_PID_ANY ||
- id.pid == peer->ksnp_id.pid)))
- continue;
-
- if (i++ == peer_off) {
- ksocknal_peer_addref(peer);
- break;
- }
- }
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- if (!i) /* no match */
- break;
-
- rc = 0;
- ksocknal_push_peer(peer);
- ksocknal_peer_decref(peer);
- }
- }
- return rc;
-}
-
-static int
-ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask)
-{
- struct ksock_net *net = ni->ni_data;
- struct ksock_interface *iface;
- int rc;
- int i;
- int j;
- struct list_head *ptmp;
- struct ksock_peer *peer;
- struct list_head *rtmp;
- struct ksock_route *route;
-
- if (!ipaddress || !netmask)
- return -EINVAL;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- iface = ksocknal_ip2iface(ni, ipaddress);
- if (iface) {
- /* silently ignore dups */
- rc = 0;
- } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
- rc = -ENOSPC;
- } else {
- iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
-
- iface->ksni_ipaddr = ipaddress;
- iface->ksni_netmask = netmask;
- iface->ksni_nroutes = 0;
- iface->ksni_npeers = 0;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer,
- ksnp_list);
-
- for (j = 0; j < peer->ksnp_n_passive_ips; j++)
- if (peer->ksnp_passive_ips[j] == ipaddress)
- iface->ksni_npeers++;
-
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, struct ksock_route,
- ksnr_list);
-
- if (route->ksnr_myipaddr == ipaddress)
- iface->ksni_nroutes++;
- }
- }
- }
-
- rc = 0;
- /*
- * NB only new connections will pay attention to the
- * new interface!
- */
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return rc;
-}
-
-static void
-ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr)
-{
- struct list_head *tmp;
- struct list_head *nxt;
- struct ksock_route *route;
- struct ksock_conn *conn;
- int i;
- int j;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++)
- if (peer->ksnp_passive_ips[i] == ipaddr) {
- for (j = i + 1; j < peer->ksnp_n_passive_ips; j++)
- peer->ksnp_passive_ips[j - 1] =
- peer->ksnp_passive_ips[j];
- peer->ksnp_n_passive_ips--;
- break;
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- if (route->ksnr_myipaddr != ipaddr)
- continue;
-
- if (route->ksnr_share_count) {
- /* Manually created; keep, but unbind */
- route->ksnr_myipaddr = 0;
- } else {
- ksocknal_del_route_locked(route);
- }
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn->ksnc_myipaddr == ipaddr)
- ksocknal_close_conn_locked(conn, 0);
- }
-}
-
-static int
-ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress)
-{
- struct ksock_net *net = ni->ni_data;
- int rc = -ENOENT;
- struct list_head *tmp;
- struct list_head *nxt;
- struct ksock_peer *peer;
- __u32 this_ip;
- int i;
- int j;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
-
- if (!(!ipaddress || ipaddress == this_ip))
- continue;
-
- rc = 0;
-
- for (j = i + 1; j < net->ksnn_ninterfaces; j++)
- net->ksnn_interfaces[j - 1] =
- net->ksnn_interfaces[j];
-
- net->ksnn_ninterfaces--;
-
- for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
- list_for_each_safe(tmp, nxt,
- &ksocknal_data.ksnd_peers[j]) {
- peer = list_entry(tmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- ksocknal_peer_del_interface_locked(peer, this_ip);
- }
- }
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return rc;
-}
-
-int
-ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
-{
- struct lnet_process_id id = {0};
- struct libcfs_ioctl_data *data = arg;
- int rc;
-
- switch (cmd) {
- case IOC_LIBCFS_GET_INTERFACE: {
- struct ksock_net *net = ni->ni_data;
- struct ksock_interface *iface;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
- rc = -ENOENT;
- } else {
- rc = 0;
- iface = &net->ksnn_interfaces[data->ioc_count];
-
- data->ioc_u32[0] = iface->ksni_ipaddr;
- data->ioc_u32[1] = iface->ksni_netmask;
- data->ioc_u32[2] = iface->ksni_npeers;
- data->ioc_u32[3] = iface->ksni_nroutes;
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return rc;
- }
-
- case IOC_LIBCFS_ADD_INTERFACE:
- return ksocknal_add_interface(ni,
- data->ioc_u32[0], /* IP address */
- data->ioc_u32[1]); /* net mask */
-
- case IOC_LIBCFS_DEL_INTERFACE:
- return ksocknal_del_interface(ni,
- data->ioc_u32[0]); /* IP address */
-
- case IOC_LIBCFS_GET_PEER: {
- __u32 myip = 0;
- __u32 ip = 0;
- int port = 0;
- int conn_count = 0;
- int share_count = 0;
-
- rc = ksocknal_get_peer_info(ni, data->ioc_count,
- &id, &myip, &ip, &port,
- &conn_count, &share_count);
- if (rc)
- return rc;
-
- data->ioc_nid = id.nid;
- data->ioc_count = share_count;
- data->ioc_u32[0] = ip;
- data->ioc_u32[1] = port;
- data->ioc_u32[2] = myip;
- data->ioc_u32[3] = conn_count;
- data->ioc_u32[4] = id.pid;
- return 0;
- }
-
- case IOC_LIBCFS_ADD_PEER:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_LUSTRE;
- return ksocknal_add_peer(ni, id,
- data->ioc_u32[0], /* IP */
- data->ioc_u32[1]); /* port */
-
- case IOC_LIBCFS_DEL_PEER:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_ANY;
- return ksocknal_del_peer(ni, id,
- data->ioc_u32[0]); /* IP */
-
- case IOC_LIBCFS_GET_CONN: {
- int txmem;
- int rxmem;
- int nagle;
- struct ksock_conn *conn;
-
- conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
- if (!conn)
- return -ENOENT;
-
- ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
-
- data->ioc_count = txmem;
- data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
- data->ioc_flags = nagle;
- data->ioc_u32[0] = conn->ksnc_ipaddr;
- data->ioc_u32[1] = conn->ksnc_port;
- data->ioc_u32[2] = conn->ksnc_myipaddr;
- data->ioc_u32[3] = conn->ksnc_type;
- data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
- data->ioc_u32[5] = rxmem;
- data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
- ksocknal_conn_decref(conn);
- return 0;
- }
-
- case IOC_LIBCFS_CLOSE_CONNECTION:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_ANY;
- return ksocknal_close_matching_conns(id,
- data->ioc_u32[0]);
-
- case IOC_LIBCFS_REGISTER_MYNID:
- /* Ignore if this is a noop */
- if (data->ioc_nid == ni->ni_nid)
- return 0;
-
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- return -EINVAL;
-
- case IOC_LIBCFS_PUSH_CONNECTION:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_ANY;
- return ksocknal_push(ni, id);
-
- default:
- return -EINVAL;
- }
- /* not reached */
-}
-
-static void
-ksocknal_free_buffers(void)
-{
- LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs));
-
- if (ksocknal_data.ksnd_sched_info) {
- struct ksock_sched_info *info;
- int i;
-
- cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info)
- kfree(info->ksi_scheds);
- cfs_percpt_free(ksocknal_data.ksnd_sched_info);
- }
-
- kvfree(ksocknal_data.ksnd_peers);
-
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- struct list_head zlist;
- struct ksock_tx *tx;
- struct ksock_tx *temp;
-
- list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
- list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
-
- list_for_each_entry_safe(tx, temp, &zlist, tx_list) {
- list_del(&tx->tx_list);
- kfree(tx);
- }
- } else {
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- }
-}
-
-static void
-ksocknal_base_shutdown(void)
-{
- struct ksock_sched_info *info;
- struct ksock_sched *sched;
- int i;
- int j;
-
- LASSERT(!ksocknal_data.ksnd_nnets);
-
- switch (ksocknal_data.ksnd_init) {
- default:
- LASSERT(0);
- /* fall through */
- case SOCKNAL_INIT_ALL:
- case SOCKNAL_INIT_DATA:
- LASSERT(ksocknal_data.ksnd_peers);
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
- LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
-
- LASSERT(list_empty(&ksocknal_data.ksnd_nets));
- LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
- LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
- LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
- LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
-
- if (ksocknal_data.ksnd_sched_info) {
- cfs_percpt_for_each(info, i,
- ksocknal_data.ksnd_sched_info) {
- if (!info->ksi_scheds)
- continue;
-
- for (j = 0; j < info->ksi_nthreads_max; j++) {
- sched = &info->ksi_scheds[j];
- LASSERT(list_empty(
- &sched->kss_tx_conns));
- LASSERT(list_empty(
- &sched->kss_rx_conns));
- LASSERT(list_empty(
- &sched->kss_zombie_noop_txs));
- LASSERT(!sched->kss_nconns);
- }
- }
- }
-
- /* flag threads to terminate; wake and wait for them to die */
- ksocknal_data.ksnd_shuttingdown = 1;
- wake_up_all(&ksocknal_data.ksnd_connd_waitq);
- wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
-
- if (ksocknal_data.ksnd_sched_info) {
- cfs_percpt_for_each(info, i,
- ksocknal_data.ksnd_sched_info) {
- if (!info->ksi_scheds)
- continue;
-
- for (j = 0; j < info->ksi_nthreads_max; j++) {
- sched = &info->ksi_scheds[j];
- wake_up_all(&sched->kss_waitq);
- }
- }
- }
-
- i = 4;
- read_lock(&ksocknal_data.ksnd_global_lock);
- while (ksocknal_data.ksnd_nthreads) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d threads to terminate\n",
- ksocknal_data.ksnd_nthreads);
- read_unlock(&ksocknal_data.ksnd_global_lock);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- read_lock(&ksocknal_data.ksnd_global_lock);
- }
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_free_buffers();
-
- ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
- break;
- }
-
- module_put(THIS_MODULE);
-}
-
-static __u64
-ksocknal_new_incarnation(void)
-{
- /* The incarnation number is the time this module loaded and it
- * identifies this particular instance of the socknal.
- */
- return ktime_get_ns();
-}
-
-static int
-ksocknal_base_startup(void)
-{
- struct ksock_sched_info *info;
- int rc;
- int i;
-
- LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
- LASSERT(!ksocknal_data.ksnd_nnets);
-
- memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
-
- ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
- ksocknal_data.ksnd_peers = kvmalloc_array(ksocknal_data.ksnd_peer_hash_size,
- sizeof(struct list_head),
- GFP_KERNEL);
- if (!ksocknal_data.ksnd_peers)
- return -ENOMEM;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
- INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
-
- rwlock_init(&ksocknal_data.ksnd_global_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
-
- spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
- init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_lock_init(&ksocknal_data.ksnd_connd_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
- init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
-
- spin_lock_init(&ksocknal_data.ksnd_tx_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
-
- /* NB memset above zeros whole of ksocknal_data */
-
- /* flag lists/ptrs/locks initialised */
- ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
- try_module_get(THIS_MODULE);
-
- ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*info));
- if (!ksocknal_data.ksnd_sched_info)
- goto failed;
-
- cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
- struct ksock_sched *sched;
- int nthrs;
-
- nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
- if (*ksocknal_tunables.ksnd_nscheds > 0) {
- nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
- } else {
- /*
- * max to half of CPUs, assume another half should be
- * reserved for upper layer modules
- */
- nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
- }
-
- info->ksi_nthreads_max = nthrs;
- info->ksi_cpt = i;
-
- info->ksi_scheds = kzalloc_cpt(info->ksi_nthreads_max * sizeof(*sched),
- GFP_NOFS, i);
- if (!info->ksi_scheds)
- goto failed;
-
- for (; nthrs > 0; nthrs--) {
- sched = &info->ksi_scheds[nthrs - 1];
-
- sched->kss_info = info;
- spin_lock_init(&sched->kss_lock);
- INIT_LIST_HEAD(&sched->kss_rx_conns);
- INIT_LIST_HEAD(&sched->kss_tx_conns);
- INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
- init_waitqueue_head(&sched->kss_waitq);
- }
- }
-
- ksocknal_data.ksnd_connd_starting = 0;
- ksocknal_data.ksnd_connd_failed_stamp = 0;
- ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
- /*
- * must have at least 2 connds to remain responsive to accepts while
- * connecting
- */
- if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
- *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
-
- if (*ksocknal_tunables.ksnd_nconnds_max <
- *ksocknal_tunables.ksnd_nconnds) {
- ksocknal_tunables.ksnd_nconnds_max =
- ksocknal_tunables.ksnd_nconnds;
- }
-
- for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
- char name[16];
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
- ksocknal_data.ksnd_connd_starting++;
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-
- snprintf(name, sizeof(name), "socknal_cd%02d", i);
- rc = ksocknal_thread_start(ksocknal_connd,
- (void *)((uintptr_t)i), name);
- if (rc) {
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
- ksocknal_data.ksnd_connd_starting--;
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
- CERROR("Can't spawn socknal connd: %d\n", rc);
- goto failed;
- }
- }
-
- rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
- if (rc) {
- CERROR("Can't spawn socknal reaper: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
-
- return 0;
-
- failed:
- ksocknal_base_shutdown();
- return -ENETDOWN;
-}
-
-static void
-ksocknal_debug_peerhash(struct lnet_ni *ni)
-{
- struct ksock_peer *peer = NULL;
- struct list_head *tmp;
- int i;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(tmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni == ni)
- break;
-
- peer = NULL;
- }
- }
-
- if (peer) {
- struct ksock_route *route;
- struct ksock_conn *conn;
-
- CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
- libcfs_id2str(peer->ksnp_id),
- atomic_read(&peer->ksnp_refcount),
- peer->ksnp_sharecount, peer->ksnp_closing,
- peer->ksnp_accepting, peer->ksnp_error,
- peer->ksnp_zc_next_cookie,
- !list_empty(&peer->ksnp_tx_queue),
- !list_empty(&peer->ksnp_zc_req_list));
-
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
- CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
- atomic_read(&route->ksnr_refcount),
- route->ksnr_scheduled, route->ksnr_connecting,
- route->ksnr_connected, route->ksnr_deleted);
- }
-
- list_for_each(tmp, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
- CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
- atomic_read(&conn->ksnc_conn_refcount),
- atomic_read(&conn->ksnc_sock_refcount),
- conn->ksnc_type, conn->ksnc_closing);
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-void
-ksocknal_shutdown(struct lnet_ni *ni)
-{
- struct ksock_net *net = ni->ni_data;
- int i;
- struct lnet_process_id anyid = {0};
-
- anyid.nid = LNET_NID_ANY;
- anyid.pid = LNET_PID_ANY;
-
- LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
- LASSERT(ksocknal_data.ksnd_nnets > 0);
-
- spin_lock_bh(&net->ksnn_lock);
- net->ksnn_shutdown = 1; /* prevent new peers */
- spin_unlock_bh(&net->ksnn_lock);
-
- /* Delete all peers */
- ksocknal_del_peer(ni, anyid, 0);
-
- /* Wait for all peer state to clean up */
- i = 2;
- spin_lock_bh(&net->ksnn_lock);
- while (net->ksnn_npeers) {
- spin_unlock_bh(&net->ksnn_lock);
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to disconnect\n",
- net->ksnn_npeers);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
-
- ksocknal_debug_peerhash(ni);
-
- spin_lock_bh(&net->ksnn_lock);
- }
- spin_unlock_bh(&net->ksnn_lock);
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- LASSERT(!net->ksnn_interfaces[i].ksni_npeers);
- LASSERT(!net->ksnn_interfaces[i].ksni_nroutes);
- }
-
- list_del(&net->ksnn_list);
- kfree(net);
-
- ksocknal_data.ksnd_nnets--;
- if (!ksocknal_data.ksnd_nnets)
- ksocknal_base_shutdown();
-}
-
-static int
-ksocknal_enumerate_interfaces(struct ksock_net *net)
-{
- char **names;
- int i;
- int j;
- int rc;
- int n;
-
- n = lnet_ipif_enumerate(&names);
- if (n <= 0) {
- CERROR("Can't enumerate interfaces: %d\n", n);
- return n;
- }
-
- for (i = j = 0; i < n; i++) {
- int up;
- __u32 ip;
- __u32 mask;
-
- if (!strcmp(names[i], "lo")) /* skip the loopback IF */
- continue;
-
- rc = lnet_ipif_query(names[i], &up, &ip, &mask);
- if (rc) {
- CWARN("Can't get interface %s info: %d\n",
- names[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s (down)\n",
- names[i]);
- continue;
- }
-
- if (j == LNET_MAX_INTERFACES) {
- CWARN("Ignoring interface %s (too many interfaces)\n",
- names[i]);
- continue;
- }
-
- net->ksnn_interfaces[j].ksni_ipaddr = ip;
- net->ksnn_interfaces[j].ksni_netmask = mask;
- strlcpy(net->ksnn_interfaces[j].ksni_name,
- names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
- j++;
- }
-
- lnet_ipif_free_enumeration(names, n);
-
- if (!j)
- CERROR("Can't find any usable interfaces\n");
-
- return j;
-}
-
-static int
-ksocknal_search_new_ipif(struct ksock_net *net)
-{
- int new_ipif = 0;
- int i;
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
- char *colon = strchr(ifnam, ':');
- int found = 0;
- struct ksock_net *tmp;
- int j;
-
- if (colon) /* ignore alias device */
- *colon = 0;
-
- list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
- for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
- char *ifnam2 =
- &tmp->ksnn_interfaces[j].ksni_name[0];
- char *colon2 = strchr(ifnam2, ':');
-
- if (colon2)
- *colon2 = 0;
-
- found = !strcmp(ifnam, ifnam2);
- if (colon2)
- *colon2 = ':';
- }
- if (found)
- break;
- }
-
- new_ipif += !found;
- if (colon)
- *colon = ':';
- }
-
- return new_ipif;
-}
-
-static int
-ksocknal_start_schedulers(struct ksock_sched_info *info)
-{
- int nthrs;
- int rc = 0;
- int i;
-
- if (!info->ksi_nthreads) {
- if (*ksocknal_tunables.ksnd_nscheds > 0) {
- nthrs = info->ksi_nthreads_max;
- } else {
- nthrs = cfs_cpt_weight(lnet_cpt_table(),
- info->ksi_cpt);
- nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
- nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
- }
- nthrs = min(nthrs, info->ksi_nthreads_max);
- } else {
- LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
- /* increase two threads if there is new interface */
- nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
- }
-
- for (i = 0; i < nthrs; i++) {
- long id;
- char name[20];
- struct ksock_sched *sched;
-
- id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
- sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
- snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
- info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
-
- rc = ksocknal_thread_start(ksocknal_scheduler,
- (void *)id, name);
- if (!rc)
- continue;
-
- CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
- info->ksi_cpt, info->ksi_nthreads + i, rc);
- break;
- }
-
- info->ksi_nthreads += i;
- return rc;
-}
-
-static int
-ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
-{
- int newif = ksocknal_search_new_ipif(net);
- int rc;
- int i;
-
- LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
-
- for (i = 0; i < ncpts; i++) {
- struct ksock_sched_info *info;
- int cpt = !cpts ? i : cpts[i];
-
- LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
- info = ksocknal_data.ksnd_sched_info[cpt];
-
- if (!newif && info->ksi_nthreads > 0)
- continue;
-
- rc = ksocknal_start_schedulers(info);
- if (rc)
- return rc;
- }
- return 0;
-}
-
-int
-ksocknal_startup(struct lnet_ni *ni)
-{
- struct ksock_net *net;
- int rc;
- int i;
-
- LASSERT(ni->ni_lnd == &the_ksocklnd);
-
- if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
- rc = ksocknal_base_startup();
- if (rc)
- return rc;
- }
-
- net = kzalloc(sizeof(*net), GFP_NOFS);
- if (!net)
- goto fail_0;
-
- spin_lock_init(&net->ksnn_lock);
- net->ksnn_incarnation = ksocknal_new_incarnation();
- ni->ni_data = net;
- ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
- ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
- ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits;
- ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
-
- if (!ni->ni_interfaces[0]) {
- rc = ksocknal_enumerate_interfaces(net);
- if (rc <= 0)
- goto fail_1;
-
- net->ksnn_ninterfaces = 1;
- } else {
- for (i = 0; i < LNET_MAX_INTERFACES; i++) {
- int up;
-
- if (!ni->ni_interfaces[i])
- break;
-
- rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
- &net->ksnn_interfaces[i].ksni_ipaddr,
- &net->ksnn_interfaces[i].ksni_netmask);
-
- if (rc) {
- CERROR("Can't get interface %s info: %d\n",
- ni->ni_interfaces[i], rc);
- goto fail_1;
- }
-
- if (!up) {
- CERROR("Interface %s is down\n",
- ni->ni_interfaces[i]);
- goto fail_1;
- }
-
- strlcpy(net->ksnn_interfaces[i].ksni_name,
- ni->ni_interfaces[i],
- sizeof(net->ksnn_interfaces[i].ksni_name));
- }
- net->ksnn_ninterfaces = i;
- }
-
- /* call it before add it to ksocknal_data.ksnd_nets */
- rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
- if (rc)
- goto fail_1;
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
- net->ksnn_interfaces[0].ksni_ipaddr);
- list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
-
- ksocknal_data.ksnd_nnets++;
-
- return 0;
-
- fail_1:
- kfree(net);
- fail_0:
- if (!ksocknal_data.ksnd_nnets)
- ksocknal_base_shutdown();
-
- return -ENETDOWN;
-}
-
-static void __exit ksocklnd_exit(void)
-{
- lnet_unregister_lnd(&the_ksocklnd);
-}
-
-static int __init ksocklnd_init(void)
-{
- int rc;
-
- /* check ksnr_connected/connecting field large enough */
- BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
- BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
-
- /* initialize the_ksocklnd */
- the_ksocklnd.lnd_type = SOCKLND;
- the_ksocklnd.lnd_startup = ksocknal_startup;
- the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
- the_ksocklnd.lnd_ctl = ksocknal_ctl;
- the_ksocklnd.lnd_send = ksocknal_send;
- the_ksocklnd.lnd_recv = ksocknal_recv;
- the_ksocklnd.lnd_notify = ksocknal_notify;
- the_ksocklnd.lnd_query = ksocknal_query;
- the_ksocklnd.lnd_accept = ksocknal_accept;
-
- rc = ksocknal_tunables_init();
- if (rc)
- return rc;
-
- lnet_register_lnd(&the_ksocklnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("TCP Socket LNet Network Driver");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(ksocklnd_init);
-module_exit(ksocklnd_exit);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
deleted file mode 100644
index 570f54ed57b1..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ /dev/null
@@ -1,705 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _SOCKLND_SOCKLND_H_
-#define _SOCKLND_SOCKLND_H_
-
-#define DEBUG_PORTAL_ALLOC
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <linux/crc32.h>
-#include <linux/errno.h>
-#include <linux/if.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/syscalls.h>
-#include <linux/sysctl.h>
-#include <linux/uio.h>
-#include <linux/unistd.h>
-#include <asm/irq.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-#include <linux/lnet/socklnd.h>
-
-/* assume one thread for each connection type */
-#define SOCKNAL_NSCHEDS 3
-#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1)
-
-#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */
-#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */
-
-#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
-#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
-
-#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
-
-/*
- * risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
- * no risk if we're not running on a CONFIG_HIGHMEM platform.
- */
-#ifdef CONFIG_HIGHMEM
-# define SOCKNAL_RISK_KMAP_DEADLOCK 0
-#else
-# define SOCKNAL_RISK_KMAP_DEADLOCK 1
-#endif
-
-struct ksock_sched_info;
-
-struct ksock_sched { /* per scheduler state */
- spinlock_t kss_lock; /* serialise */
- struct list_head kss_rx_conns; /* conn waiting to be read */
- struct list_head kss_tx_conns; /* conn waiting to be written */
- struct list_head kss_zombie_noop_txs; /* zombie noop tx list */
- wait_queue_head_t kss_waitq; /* where scheduler sleeps */
- int kss_nconns; /* # connections assigned to
- * this scheduler
- */
- struct ksock_sched_info *kss_info; /* owner of it */
-};
-
-struct ksock_sched_info {
- int ksi_nthreads_max; /* max allowed threads */
- int ksi_nthreads; /* number of threads */
- int ksi_cpt; /* CPT id */
- struct ksock_sched *ksi_scheds; /* array of schedulers */
-};
-
-#define KSOCK_CPT_SHIFT 16
-#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
-#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT)
-#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
-
-struct ksock_interface { /* in-use interface */
- __u32 ksni_ipaddr; /* interface's IP address */
- __u32 ksni_netmask; /* interface's network mask */
- int ksni_nroutes; /* # routes using (active) */
- int ksni_npeers; /* # peers using (passive) */
- char ksni_name[IFNAMSIZ]; /* interface name */
-};
-
-struct ksock_tunables {
- int *ksnd_timeout; /* "stuck" socket timeout
- * (seconds)
- */
- int *ksnd_nscheds; /* # scheduler threads in each
- * pool while starting
- */
- int *ksnd_nconnds; /* # connection daemons */
- int *ksnd_nconnds_max; /* max # connection daemons */
- int *ksnd_min_reconnectms; /* first connection retry after
- * (ms)...
- */
- int *ksnd_max_reconnectms; /* ...exponentially increasing to
- * this
- */
- int *ksnd_eager_ack; /* make TCP ack eagerly? */
- int *ksnd_typed_conns; /* drive sockets by type? */
- int *ksnd_min_bulk; /* smallest "large" message */
- int *ksnd_tx_buffer_size; /* socket tx buffer size */
- int *ksnd_rx_buffer_size; /* socket rx buffer size */
- int *ksnd_nagle; /* enable NAGLE? */
- int *ksnd_round_robin; /* round robin for multiple
- * interfaces
- */
- int *ksnd_keepalive; /* # secs for sending keepalive
- * NOOP
- */
- int *ksnd_keepalive_idle; /* # idle secs before 1st probe
- */
- int *ksnd_keepalive_count; /* # probes */
- int *ksnd_keepalive_intvl; /* time between probes */
- int *ksnd_credits; /* # concurrent sends */
- int *ksnd_peertxcredits; /* # concurrent sends to 1 peer
- */
- int *ksnd_peerrtrcredits; /* # per-peer router buffer
- * credits
- */
- int *ksnd_peertimeout; /* seconds to consider peer dead
- */
- int *ksnd_enable_csum; /* enable check sum */
- int *ksnd_inject_csum_error; /* set non-zero to inject
- * checksum error
- */
- int *ksnd_nonblk_zcack; /* always send zc-ack on
- * non-blocking connection
- */
- unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload
- * size
- */
- int *ksnd_zc_recv; /* enable ZC receive (for
- * Chelsio TOE)
- */
- int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to
- * enable ZC receive
- */
-};
-
-struct ksock_net {
- __u64 ksnn_incarnation; /* my epoch */
- spinlock_t ksnn_lock; /* serialise */
- struct list_head ksnn_list; /* chain on global list */
- int ksnn_npeers; /* # peers */
- int ksnn_shutdown; /* shutting down? */
- int ksnn_ninterfaces; /* IP interfaces */
- struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES];
-};
-
-/** connd timeout */
-#define SOCKNAL_CONND_TIMEOUT 120
-/** reserved thread for accepting & creating new connd */
-#define SOCKNAL_CONND_RESV 1
-
-struct ksock_nal_data {
- int ksnd_init; /* initialisation state
- */
- int ksnd_nnets; /* # networks set up */
- struct list_head ksnd_nets; /* list of nets */
- rwlock_t ksnd_global_lock; /* stabilize peer/conn
- * ops
- */
- struct list_head *ksnd_peers; /* hash table of all my
- * known peers
- */
- int ksnd_peer_hash_size; /* size of ksnd_peers */
-
- int ksnd_nthreads; /* # live threads */
- int ksnd_shuttingdown; /* tell threads to exit
- */
- struct ksock_sched_info **ksnd_sched_info; /* schedulers info */
-
- atomic_t ksnd_nactive_txs; /* #active txs */
-
- struct list_head ksnd_deathrow_conns; /* conns to close:
- * reaper_lock
- */
- struct list_head ksnd_zombie_conns; /* conns to free:
- * reaper_lock
- */
- struct list_head ksnd_enomem_conns; /* conns to retry:
- * reaper_lock
- */
- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
- unsigned long ksnd_reaper_waketime; /* when reaper will wake
- */
- spinlock_t ksnd_reaper_lock; /* serialise */
-
- int ksnd_enomem_tx; /* test ENOMEM sender */
- int ksnd_stall_tx; /* test sluggish sender
- */
- int ksnd_stall_rx; /* test sluggish
- * receiver
- */
- struct list_head ksnd_connd_connreqs; /* incoming connection
- * requests
- */
- struct list_head ksnd_connd_routes; /* routes waiting to be
- * connected
- */
- wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */
- int ksnd_connd_connecting; /* # connds connecting
- */
- time64_t ksnd_connd_failed_stamp;/* time stamp of the
- * last failed
- * connecting attempt
- */
- time64_t ksnd_connd_starting_stamp;/* time stamp of the
- * last starting connd
- */
- unsigned int ksnd_connd_starting; /* # starting connd */
- unsigned int ksnd_connd_running; /* # running connd */
- spinlock_t ksnd_connd_lock; /* serialise */
-
- struct list_head ksnd_idle_noop_txs; /* list head for freed
- * noop tx
- */
- spinlock_t ksnd_tx_lock; /* serialise, g_lock
- * unsafe
- */
-};
-
-#define SOCKNAL_INIT_NOTHING 0
-#define SOCKNAL_INIT_DATA 1
-#define SOCKNAL_INIT_ALL 2
-
-/*
- * A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments (the first frag contains the portals header),
- * followed by 0 or more struct bio_vec fragments.
- *
- * On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header). Once the header has been received, the payload is
- * received into either struct iovec or struct bio_vec fragments, depending on
- * what the header matched or whether the message needs forwarding.
- */
-struct ksock_conn; /* forward ref */
-struct ksock_peer; /* forward ref */
-struct ksock_route; /* forward ref */
-struct ksock_proto; /* forward ref */
-
-struct ksock_tx { /* transmit packet */
- struct list_head tx_list; /* queue on conn for transmission etc
- */
- struct list_head tx_zc_list; /* queue on peer for ZC request */
- atomic_t tx_refcount; /* tx reference count */
- int tx_nob; /* # packet bytes */
- int tx_resid; /* residual bytes */
- int tx_niov; /* # packet iovec frags */
- struct kvec *tx_iov; /* packet iovec frags */
- int tx_nkiov; /* # packet page frags */
- unsigned short tx_zc_aborted; /* aborted ZC request */
- unsigned short tx_zc_capable:1; /* payload is large enough for ZC */
- unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */
- unsigned short tx_nonblk:1; /* it's a non-blocking ACK */
- struct bio_vec *tx_kiov; /* packet page frags */
- struct ksock_conn *tx_conn; /* owning conn */
- struct lnet_msg *tx_lnetmsg; /* lnet message for lnet_finalize()
- */
- unsigned long tx_deadline; /* when (in jiffies) tx times out */
- struct ksock_msg tx_msg; /* socklnd message buffer */
- int tx_desc_size; /* size of this descriptor */
- union {
- struct {
- struct kvec iov; /* virt hdr */
- struct bio_vec kiov[0]; /* paged payload */
- } paged;
- struct {
- struct kvec iov[1]; /* virt hdr + payload */
- } virt;
- } tx_frags;
-};
-
-#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0]))
-
-/* network zero copy callback descriptor embedded in struct ksock_tx */
-
-#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */
-#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */
-#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */
-#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */
-#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
-#define SOCKNAL_RX_SLOP 6 /* skipping body */
-
-struct ksock_conn {
- struct ksock_peer *ksnc_peer; /* owning peer */
- struct ksock_route *ksnc_route; /* owning route */
- struct list_head ksnc_list; /* stash on peer's conn list */
- struct socket *ksnc_sock; /* actual socket */
- void *ksnc_saved_data_ready; /* socket's original
- * data_ready() callback
- */
- void *ksnc_saved_write_space; /* socket's original
- * write_space() callback
- */
- atomic_t ksnc_conn_refcount;/* conn refcount */
- atomic_t ksnc_sock_refcount;/* sock refcount */
- struct ksock_sched *ksnc_scheduler; /* who schedules this connection
- */
- __u32 ksnc_myipaddr; /* my IP */
- __u32 ksnc_ipaddr; /* peer's IP */
- int ksnc_port; /* peer's port */
- signed int ksnc_type:3; /* type of connection, should be
- * signed value
- */
- unsigned int ksnc_closing:1; /* being shut down */
- unsigned int ksnc_flip:1; /* flip or not, only for V2.x */
- unsigned int ksnc_zc_capable:1; /* enable to ZC */
- struct ksock_proto *ksnc_proto; /* protocol for the connection */
-
- /* reader */
- struct list_head ksnc_rx_list; /* where I enq waiting input or a
- * forwarding descriptor
- */
- unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times
- * out
- */
- __u8 ksnc_rx_started; /* started receiving a message */
- __u8 ksnc_rx_ready; /* data ready to read */
- __u8 ksnc_rx_scheduled; /* being progressed */
- __u8 ksnc_rx_state; /* what is being read */
- int ksnc_rx_nob_left; /* # bytes to next hdr/body */
- struct iov_iter ksnc_rx_to; /* copy destination */
- struct kvec ksnc_rx_iov_space[LNET_MAX_IOV]; /* space for frag descriptors */
- __u32 ksnc_rx_csum; /* partial checksum for incoming
- * data
- */
- void *ksnc_cookie; /* rx lnet_finalize passthru arg
- */
- struct ksock_msg ksnc_msg; /* incoming message buffer:
- * V2.x message takes the
- * whole struct
- * V1.x message is a bare
- * struct lnet_hdr, it's stored in
- * ksnc_msg.ksm_u.lnetmsg
- */
- /* WRITER */
- struct list_head ksnc_tx_list; /* where I enq waiting for output
- * space
- */
- struct list_head ksnc_tx_queue; /* packets waiting to be sent */
- struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet
- * message or ZC-ACK
- */
- unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out
- */
- int ksnc_tx_bufnob; /* send buffer marker */
- atomic_t ksnc_tx_nob; /* # bytes queued */
- int ksnc_tx_ready; /* write space */
- int ksnc_tx_scheduled; /* being progressed */
- unsigned long ksnc_tx_last_post; /* time stamp of the last posted
- * TX
- */
-};
-
-struct ksock_route {
- struct list_head ksnr_list; /* chain on peer route list */
- struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
- struct ksock_peer *ksnr_peer; /* owning peer */
- atomic_t ksnr_refcount; /* # users */
- unsigned long ksnr_timeout; /* when (in jiffies) reconnection
- * can happen next
- */
- long ksnr_retry_interval; /* how long between retries */
- __u32 ksnr_myipaddr; /* my IP */
- __u32 ksnr_ipaddr; /* IP address to connect to */
- int ksnr_port; /* port to connect to */
- unsigned int ksnr_scheduled:1; /* scheduled for attention */
- unsigned int ksnr_connecting:1; /* connection establishment in
- * progress
- */
- unsigned int ksnr_connected:4; /* connections established by
- * type
- */
- unsigned int ksnr_deleted:1; /* been removed from peer? */
- unsigned int ksnr_share_count; /* created explicitly? */
- int ksnr_conn_count; /* # conns established by this
- * route
- */
-};
-
-#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
-
-struct ksock_peer {
- struct list_head ksnp_list; /* stash on global peer list */
- unsigned long ksnp_last_alive; /* when (in jiffies) I was last
- * alive
- */
- struct lnet_process_id ksnp_id; /* who's on the other end(s) */
- atomic_t ksnp_refcount; /* # users */
- int ksnp_sharecount; /* lconf usage counter */
- int ksnp_closing; /* being closed */
- int ksnp_accepting; /* # passive connections pending
- */
- int ksnp_error; /* errno on closing last conn */
- __u64 ksnp_zc_next_cookie; /* ZC completion cookie */
- __u64 ksnp_incarnation; /* latest known peer incarnation
- */
- struct ksock_proto *ksnp_proto; /* latest known peer protocol */
- struct list_head ksnp_conns; /* all active connections */
- struct list_head ksnp_routes; /* routes */
- struct list_head ksnp_tx_queue; /* waiting packets */
- spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
- struct list_head ksnp_zc_req_list; /* zero copy requests wait for
- * ACK
- */
- unsigned long ksnp_send_keepalive; /* time to send keepalive */
- struct lnet_ni *ksnp_ni; /* which network */
- int ksnp_n_passive_ips; /* # of... */
-
- /* preferred local interfaces */
- __u32 ksnp_passive_ips[LNET_MAX_INTERFACES];
-};
-
-struct ksock_connreq {
- struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */
- struct lnet_ni *ksncr_ni; /* chosen NI */
- struct socket *ksncr_sock; /* accepted socket */
-};
-
-extern struct ksock_nal_data ksocknal_data;
-extern struct ksock_tunables ksocknal_tunables;
-
-#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */
-#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
-#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not
- * preferred
- */
-
-struct ksock_proto {
- /* version number of protocol */
- int pro_version;
-
- /* handshake function */
- int (*pro_send_hello)(struct ksock_conn *, struct ksock_hello_msg *);
-
- /* handshake function */
- int (*pro_recv_hello)(struct ksock_conn *, struct ksock_hello_msg *, int);
-
- /* message pack */
- void (*pro_pack)(struct ksock_tx *);
-
- /* message unpack */
- void (*pro_unpack)(struct ksock_msg *);
-
- /* queue tx on the connection */
- struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *);
-
- /* queue ZC ack on the connection */
- int (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64);
-
- /* handle ZC request */
- int (*pro_handle_zcreq)(struct ksock_conn *, __u64, int);
-
- /* handle ZC ACK */
- int (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64);
-
- /*
- * msg type matches the connection type:
- * return value:
- * return MATCH_NO : no
- * return MATCH_YES : matching type
- * return MATCH_MAY : can be backup
- */
- int (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int);
-};
-
-extern struct ksock_proto ksocknal_protocol_v1x;
-extern struct ksock_proto ksocknal_protocol_v2x;
-extern struct ksock_proto ksocknal_protocol_v3x;
-
-#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
-#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
-#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
-
-#ifndef CPU_MASK_NONE
-#define CPU_MASK_NONE 0UL
-#endif
-
-static inline int
-ksocknal_route_mask(void)
-{
- if (!*ksocknal_tunables.ksnd_typed_conns)
- return (1 << SOCKLND_CONN_ANY);
-
- return ((1 << SOCKLND_CONN_CONTROL) |
- (1 << SOCKLND_CONN_BULK_IN) |
- (1 << SOCKLND_CONN_BULK_OUT));
-}
-
-static inline struct list_head *
-ksocknal_nid2peerlist(lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
-
- return &ksocknal_data.ksnd_peers[hash];
-}
-
-static inline void
-ksocknal_conn_addref(struct ksock_conn *conn)
-{
- LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
- atomic_inc(&conn->ksnc_conn_refcount);
-}
-
-void ksocknal_queue_zombie_conn(struct ksock_conn *conn);
-void ksocknal_finalize_zcreq(struct ksock_conn *conn);
-
-static inline void
-ksocknal_conn_decref(struct ksock_conn *conn)
-{
- LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
- if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
- ksocknal_queue_zombie_conn(conn);
-}
-
-static inline int
-ksocknal_connsock_addref(struct ksock_conn *conn)
-{
- int rc = -ESHUTDOWN;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
- if (!conn->ksnc_closing) {
- LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
- atomic_inc(&conn->ksnc_sock_refcount);
- rc = 0;
- }
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- return rc;
-}
-
-static inline void
-ksocknal_connsock_decref(struct ksock_conn *conn)
-{
- LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
- if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
- LASSERT(conn->ksnc_closing);
- sock_release(conn->ksnc_sock);
- conn->ksnc_sock = NULL;
- ksocknal_finalize_zcreq(conn);
- }
-}
-
-static inline void
-ksocknal_tx_addref(struct ksock_tx *tx)
-{
- LASSERT(atomic_read(&tx->tx_refcount) > 0);
- atomic_inc(&tx->tx_refcount);
-}
-
-void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx);
-void ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx);
-
-static inline void
-ksocknal_tx_decref(struct ksock_tx *tx)
-{
- LASSERT(atomic_read(&tx->tx_refcount) > 0);
- if (atomic_dec_and_test(&tx->tx_refcount))
- ksocknal_tx_done(NULL, tx);
-}
-
-static inline void
-ksocknal_route_addref(struct ksock_route *route)
-{
- LASSERT(atomic_read(&route->ksnr_refcount) > 0);
- atomic_inc(&route->ksnr_refcount);
-}
-
-void ksocknal_destroy_route(struct ksock_route *route);
-
-static inline void
-ksocknal_route_decref(struct ksock_route *route)
-{
- LASSERT(atomic_read(&route->ksnr_refcount) > 0);
- if (atomic_dec_and_test(&route->ksnr_refcount))
- ksocknal_destroy_route(route);
-}
-
-static inline void
-ksocknal_peer_addref(struct ksock_peer *peer)
-{
- LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
- atomic_inc(&peer->ksnp_refcount);
-}
-
-void ksocknal_destroy_peer(struct ksock_peer *peer);
-
-static inline void
-ksocknal_peer_decref(struct ksock_peer *peer)
-{
- LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
- if (atomic_dec_and_test(&peer->ksnp_refcount))
- ksocknal_destroy_peer(peer);
-}
-
-int ksocknal_startup(struct lnet_ni *ni);
-void ksocknal_shutdown(struct lnet_ni *ni);
-int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg);
-int ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
-int ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen);
-int ksocknal_accept(struct lnet_ni *ni, struct socket *sock);
-
-int ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip,
- int port);
-struct ksock_peer *ksocknal_find_peer_locked(struct lnet_ni *ni,
- struct lnet_process_id id);
-struct ksock_peer *ksocknal_find_peer(struct lnet_ni *ni,
- struct lnet_process_id id);
-void ksocknal_peer_failed(struct ksock_peer *peer);
-int ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
- struct socket *sock, int type);
-void ksocknal_close_conn_locked(struct ksock_conn *conn, int why);
-void ksocknal_terminate_conn(struct ksock_conn *conn);
-void ksocknal_destroy_conn(struct ksock_conn *conn);
-int ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
- __u32 ipaddr, int why);
-int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
-int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr);
-struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
- struct ksock_tx *tx, int nonblk);
-
-int ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
- struct lnet_process_id id);
-struct ksock_tx *ksocknal_alloc_tx(int type, int size);
-void ksocknal_free_tx(struct ksock_tx *tx);
-struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
-void ksocknal_next_tx_carrier(struct ksock_conn *conn);
-void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
-void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error);
-void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive);
-void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
-int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
-void ksocknal_thread_fini(void);
-void ksocknal_launch_all_connections_locked(struct ksock_peer *peer);
-struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer);
-struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer);
-int ksocknal_new_packet(struct ksock_conn *conn, int skip);
-int ksocknal_scheduler(void *arg);
-int ksocknal_connd(void *arg);
-int ksocknal_reaper(void *arg);
-int ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- lnet_nid_t peer_nid, struct ksock_hello_msg *hello);
-int ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- struct ksock_hello_msg *hello,
- struct lnet_process_id *id,
- __u64 *incarnation);
-void ksocknal_read_callback(struct ksock_conn *conn);
-void ksocknal_write_callback(struct ksock_conn *conn);
-
-int ksocknal_lib_zc_capable(struct ksock_conn *conn);
-void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_push_conn(struct ksock_conn *conn);
-int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn);
-int ksocknal_lib_setup_sock(struct socket *so);
-int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
-int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
-void ksocknal_lib_eager_ack(struct ksock_conn *conn);
-int ksocknal_lib_recv(struct ksock_conn *conn);
-int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
- int *rxmem, int *nagle);
-
-void ksocknal_read_callback(struct ksock_conn *conn);
-void ksocknal_write_callback(struct ksock_conn *conn);
-
-int ksocknal_tunables_init(void);
-
-void ksocknal_lib_csum_tx(struct ksock_tx *tx);
-
-int ksocknal_lib_memory_pressure(struct ksock_conn *conn);
-int ksocknal_lib_bind_thread_to_cpu(int id);
-
-#endif /* _SOCKLND_SOCKLND_H_ */
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
deleted file mode 100644
index 036fecbcede8..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ /dev/null
@@ -1,2592 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include "socklnd.h"
-
-struct ksock_tx *
-ksocknal_alloc_tx(int type, int size)
-{
- struct ksock_tx *tx = NULL;
-
- if (type == KSOCK_MSG_NOOP) {
- LASSERT(size == KSOCK_NOOP_TX_SIZE);
-
- /* searching for a noop tx in free list */
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
- struct ksock_tx, tx_list);
- LASSERT(tx->tx_desc_size == size);
- list_del(&tx->tx_list);
- }
-
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- }
-
- if (!tx)
- tx = kzalloc(size, GFP_NOFS);
-
- if (!tx)
- return NULL;
-
- atomic_set(&tx->tx_refcount, 1);
- tx->tx_zc_aborted = 0;
- tx->tx_zc_capable = 0;
- tx->tx_zc_checked = 0;
- tx->tx_desc_size = size;
-
- atomic_inc(&ksocknal_data.ksnd_nactive_txs);
-
- return tx;
-}
-
-struct ksock_tx *
-ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
-{
- struct ksock_tx *tx;
-
- tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE);
- if (!tx) {
- CERROR("Can't allocate noop tx desc\n");
- return NULL;
- }
-
- tx->tx_conn = NULL;
- tx->tx_lnetmsg = NULL;
- tx->tx_kiov = NULL;
- tx->tx_nkiov = 0;
- tx->tx_iov = tx->tx_frags.virt.iov;
- tx->tx_niov = 1;
- tx->tx_nonblk = nonblk;
-
- tx->tx_msg.ksm_csum = 0;
- tx->tx_msg.ksm_type = KSOCK_MSG_NOOP;
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
-
- return tx;
-}
-
-void
-ksocknal_free_tx(struct ksock_tx *tx)
-{
- atomic_dec(&ksocknal_data.ksnd_nactive_txs);
-
- if (!tx->tx_lnetmsg && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) {
- /* it's a noop tx */
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs);
-
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- } else {
- kfree(tx);
- }
-}
-
-static int
-ksocknal_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct kvec *iov = tx->tx_iov;
- int nob;
- int rc;
-
- LASSERT(tx->tx_niov > 0);
-
- /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
- rc = ksocknal_lib_send_iov(conn, tx);
-
- if (rc <= 0) /* sent nothing? */
- return rc;
-
- nob = rc;
- LASSERT(nob <= tx->tx_resid);
- tx->tx_resid -= nob;
-
- /* "consume" iov */
- do {
- LASSERT(tx->tx_niov > 0);
-
- if (nob < (int)iov->iov_len) {
- iov->iov_base = (void *)((char *)iov->iov_base + nob);
- iov->iov_len -= nob;
- return rc;
- }
-
- nob -= iov->iov_len;
- tx->tx_iov = ++iov;
- tx->tx_niov--;
- } while (nob);
-
- return rc;
-}
-
-static int
-ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct bio_vec *kiov = tx->tx_kiov;
- int nob;
- int rc;
-
- LASSERT(!tx->tx_niov);
- LASSERT(tx->tx_nkiov > 0);
-
- /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
- rc = ksocknal_lib_send_kiov(conn, tx);
-
- if (rc <= 0) /* sent nothing? */
- return rc;
-
- nob = rc;
- LASSERT(nob <= tx->tx_resid);
- tx->tx_resid -= nob;
-
- /* "consume" kiov */
- do {
- LASSERT(tx->tx_nkiov > 0);
-
- if (nob < (int)kiov->bv_len) {
- kiov->bv_offset += nob;
- kiov->bv_len -= nob;
- return rc;
- }
-
- nob -= (int)kiov->bv_len;
- tx->tx_kiov = ++kiov;
- tx->tx_nkiov--;
- } while (nob);
-
- return rc;
-}
-
-static int
-ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- int rc;
- int bufnob;
-
- if (ksocknal_data.ksnd_stall_tx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ksocknal_data.ksnd_stall_tx * HZ);
- }
-
- LASSERT(tx->tx_resid);
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) {
- LASSERT(conn->ksnc_closing);
- return -ESHUTDOWN;
- }
-
- do {
- if (ksocknal_data.ksnd_enomem_tx > 0) {
- /* testing... */
- ksocknal_data.ksnd_enomem_tx--;
- rc = -EAGAIN;
- } else if (tx->tx_niov) {
- rc = ksocknal_send_iov(conn, tx);
- } else {
- rc = ksocknal_send_kiov(conn, tx);
- }
-
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
- if (rc > 0) /* sent something? */
- conn->ksnc_tx_bufnob += rc; /* account it */
-
- if (bufnob < conn->ksnc_tx_bufnob) {
- /*
- * allocated send buffer bytes < computed; infer
- * something got ACKed
- */
- conn->ksnc_tx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
- conn->ksnc_tx_bufnob = bufnob;
- mb();
- }
-
- if (rc <= 0) { /* Didn't write anything? */
-
- if (!rc) /* some stacks return 0 instead of -EAGAIN */
- rc = -EAGAIN;
-
- /* Check if EAGAIN is due to memory pressure */
- if (rc == -EAGAIN && ksocknal_lib_memory_pressure(conn))
- rc = -ENOMEM;
-
- break;
- }
-
- /* socket's wmem_queued now includes 'rc' bytes */
- atomic_sub(rc, &conn->ksnc_tx_nob);
- rc = 0;
-
- } while (tx->tx_resid);
-
- ksocknal_connsock_decref(conn);
- return rc;
-}
-
-static int
-ksocknal_recv_iter(struct ksock_conn *conn)
-{
- int nob;
- int rc;
-
- /*
- * Never touch conn->ksnc_rx_to or change connection
- * status inside ksocknal_lib_recv
- */
- rc = ksocknal_lib_recv(conn);
-
- if (rc <= 0)
- return rc;
-
- /* received something... */
- nob = rc;
-
- conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
- conn->ksnc_rx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- mb(); /* order with setting rx_started */
- conn->ksnc_rx_started = 1;
-
- conn->ksnc_rx_nob_left -= nob;
-
- iov_iter_advance(&conn->ksnc_rx_to, nob);
- if (iov_iter_count(&conn->ksnc_rx_to))
- return -EAGAIN;
-
- return 1;
-}
-
-static int
-ksocknal_receive(struct ksock_conn *conn)
-{
- /*
- * Return 1 on success, 0 on EOF, < 0 on error.
- * Caller checks ksnc_rx_to to determine
- * progress/completion.
- */
- int rc;
-
- if (ksocknal_data.ksnd_stall_rx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ksocknal_data.ksnd_stall_rx * HZ);
- }
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) {
- LASSERT(conn->ksnc_closing);
- return -ESHUTDOWN;
- }
-
- for (;;) {
- rc = ksocknal_recv_iter(conn);
- if (rc <= 0) {
- /* error/EOF or partial receive */
- if (rc == -EAGAIN) {
- rc = 1;
- } else if (!rc && conn->ksnc_rx_started) {
- /* EOF in the middle of a message */
- rc = -EPROTO;
- }
- break;
- }
-
- /* Completed a fragment */
-
- if (!iov_iter_count(&conn->ksnc_rx_to)) {
- rc = 1;
- break;
- }
- }
-
- ksocknal_connsock_decref(conn);
- return rc;
-}
-
-void
-ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx)
-{
- struct lnet_msg *lnetmsg = tx->tx_lnetmsg;
- int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO;
-
- LASSERT(ni || tx->tx_conn);
-
- if (tx->tx_conn)
- ksocknal_conn_decref(tx->tx_conn);
-
- if (!ni && tx->tx_conn)
- ni = tx->tx_conn->ksnc_peer->ksnp_ni;
-
- ksocknal_free_tx(tx);
- if (lnetmsg) /* KSOCK_MSG_NOOP go without lnetmsg */
- lnet_finalize(ni, lnetmsg, rc);
-}
-
-void
-ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error)
-{
- struct ksock_tx *tx;
-
- while (!list_empty(txlist)) {
- tx = list_entry(txlist->next, struct ksock_tx, tx_list);
-
- if (error && tx->tx_lnetmsg) {
- CNETERR("Deleting packet type %d len %d %s->%s\n",
- le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type),
- le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length),
- libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
- libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid)));
- } else if (error) {
- CNETERR("Deleting noop packet\n");
- }
-
- list_del(&tx->tx_list);
-
- LASSERT(atomic_read(&tx->tx_refcount) == 1);
- ksocknal_tx_done(ni, tx);
- }
-}
-
-static void
-ksocknal_check_zc_req(struct ksock_tx *tx)
-{
- struct ksock_conn *conn = tx->tx_conn;
- struct ksock_peer *peer = conn->ksnc_peer;
-
- /*
- * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
- * to ksnp_zc_req_list if some fragment of this message should be sent
- * zero-copy. Our peer will send an ACK containing this cookie when
- * she has received this message to tell us we can signal completion.
- * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on
- * ksnp_zc_req_list.
- */
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
- LASSERT(tx->tx_zc_capable);
-
- tx->tx_zc_checked = 1;
-
- if (conn->ksnc_proto == &ksocknal_protocol_v1x ||
- !conn->ksnc_zc_capable)
- return;
-
- /*
- * assign cookie and queue tx to pending list, it will be released when
- * a matching ack is received. See ksocknal_handle_zcack()
- */
- ksocknal_tx_addref(tx);
-
- spin_lock(&peer->ksnp_lock);
-
- /* ZC_REQ is going to be pinned to the peer */
- tx->tx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
-
- LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
-
- tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++;
-
- if (!peer->ksnp_zc_next_cookie)
- peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
-
- list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
-
- spin_unlock(&peer->ksnp_lock);
-}
-
-static void
-ksocknal_uncheck_zc_req(struct ksock_tx *tx)
-{
- struct ksock_peer *peer = tx->tx_conn->ksnc_peer;
-
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
- LASSERT(tx->tx_zc_capable);
-
- tx->tx_zc_checked = 0;
-
- spin_lock(&peer->ksnp_lock);
-
- if (!tx->tx_msg.ksm_zc_cookies[0]) {
- /* Not waiting for an ACK */
- spin_unlock(&peer->ksnp_lock);
- return;
- }
-
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- list_del(&tx->tx_zc_list);
-
- spin_unlock(&peer->ksnp_lock);
-
- ksocknal_tx_decref(tx);
-}
-
-static int
-ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- int rc;
-
- if (tx->tx_zc_capable && !tx->tx_zc_checked)
- ksocknal_check_zc_req(tx);
-
- rc = ksocknal_transmit(conn, tx);
-
- CDEBUG(D_NET, "send(%d) %d\n", tx->tx_resid, rc);
-
- if (!tx->tx_resid) {
- /* Sent everything OK */
- LASSERT(!rc);
-
- return 0;
- }
-
- if (rc == -EAGAIN)
- return rc;
-
- if (rc == -ENOMEM) {
- static int counter;
-
- counter++; /* exponential backoff warnings */
- if ((counter & (-counter)) == counter)
- CWARN("%u ENOMEM tx %p\n", counter, conn);
-
- /* Queue on ksnd_enomem_conns for retry after a timeout */
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- /* enomem list takes over scheduler's ref... */
- LASSERT(conn->ksnc_tx_scheduled);
- list_add_tail(&conn->ksnc_tx_list,
- &ksocknal_data.ksnd_enomem_conns);
- if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(),
- SOCKNAL_ENOMEM_RETRY),
- ksocknal_data.ksnd_reaper_waketime))
- wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
- return rc;
- }
-
- /* Actual error */
- LASSERT(rc < 0);
-
- if (!conn->ksnc_closing) {
- switch (rc) {
- case -ECONNRESET:
- LCONSOLE_WARN("Host %pI4h reset our connection while we were sending data; it may have rebooted.\n",
- &conn->ksnc_ipaddr);
- break;
- default:
- LCONSOLE_WARN("There was an unexpected network error while writing to %pI4h: %d.\n",
- &conn->ksnc_ipaddr, rc);
- break;
- }
- CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pI4h:%d\n",
- conn, rc,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- }
-
- if (tx->tx_zc_checked)
- ksocknal_uncheck_zc_req(tx);
-
- /* it's not an error if conn is being closed */
- ksocknal_close_conn_and_siblings(conn, (conn->ksnc_closing) ? 0 : rc);
-
- return rc;
-}
-
-static void
-ksocknal_launch_connection_locked(struct ksock_route *route)
-{
- /* called holding write lock on ksnd_global_lock */
-
- LASSERT(!route->ksnr_scheduled);
- LASSERT(!route->ksnr_connecting);
- LASSERT(ksocknal_route_mask() & ~route->ksnr_connected);
-
- route->ksnr_scheduled = 1; /* scheduling conn for connd */
- ksocknal_route_addref(route); /* extra ref for connd */
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-
- list_add_tail(&route->ksnr_connd_list,
- &ksocknal_data.ksnd_connd_routes);
- wake_up(&ksocknal_data.ksnd_connd_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-}
-
-void
-ksocknal_launch_all_connections_locked(struct ksock_peer *peer)
-{
- struct ksock_route *route;
-
- /* called holding write lock on ksnd_global_lock */
- for (;;) {
- /* launch any/all connections that need it */
- route = ksocknal_find_connectable_route_locked(peer);
- if (!route)
- return;
-
- ksocknal_launch_connection_locked(route);
- }
-}
-
-struct ksock_conn *
-ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx,
- int nonblk)
-{
- struct list_head *tmp;
- struct ksock_conn *conn;
- struct ksock_conn *typed = NULL;
- struct ksock_conn *fallback = NULL;
- int tnob = 0;
- int fnob = 0;
-
- list_for_each(tmp, &peer->ksnp_conns) {
- struct ksock_conn *c;
- int nob, rc;
-
- c = list_entry(tmp, struct ksock_conn, ksnc_list);
- nob = atomic_read(&c->ksnc_tx_nob) +
- c->ksnc_sock->sk->sk_wmem_queued;
-
- LASSERT(!c->ksnc_closing);
- LASSERT(c->ksnc_proto &&
- c->ksnc_proto->pro_match_tx);
-
- rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk);
-
- switch (rc) {
- default:
- LBUG();
- case SOCKNAL_MATCH_NO: /* protocol rejected the tx */
- continue;
-
- case SOCKNAL_MATCH_YES: /* typed connection */
- if (!typed || tnob > nob ||
- (tnob == nob && *ksocknal_tunables.ksnd_round_robin &&
- cfs_time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
- typed = c;
- tnob = nob;
- }
- break;
-
- case SOCKNAL_MATCH_MAY: /* fallback connection */
- if (!fallback || fnob > nob ||
- (fnob == nob && *ksocknal_tunables.ksnd_round_robin &&
- cfs_time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
- fallback = c;
- fnob = nob;
- }
- break;
- }
- }
-
- /* prefer the typed selection */
- conn = (typed) ? typed : fallback;
-
- if (conn)
- conn->ksnc_tx_last_post = cfs_time_current();
-
- return conn;
-}
-
-void
-ksocknal_tx_prep(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- conn->ksnc_proto->pro_pack(tx);
-
- atomic_add(tx->tx_nob, &conn->ksnc_tx_nob);
- ksocknal_conn_addref(conn); /* +1 ref for tx */
- tx->tx_conn = conn;
-}
-
-void
-ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
-{
- struct ksock_sched *sched = conn->ksnc_scheduler;
- struct ksock_msg *msg = &tx->tx_msg;
- struct ksock_tx *ztx = NULL;
- int bufnob = 0;
-
- /*
- * called holding global lock (read or irq-write) and caller may
- * not have dropped this lock between finding conn and calling me,
- * so we don't need the {get,put}connsock dance to deref
- * ksnc_sock...
- */
- LASSERT(!conn->ksnc_closing);
-
- CDEBUG(D_NET, "Sending to %s ip %pI4h:%d\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port);
-
- ksocknal_tx_prep(conn, tx);
-
- /*
- * Ensure the frags we've been given EXACTLY match the number of
- * bytes we want to send. Many TCP/IP stacks disregard any total
- * size parameters passed to them and just look at the frags.
- *
- * We always expect at least 1 mapped fragment containing the
- * complete ksocknal message header.
- */
- LASSERT(lnet_iov_nob(tx->tx_niov, tx->tx_iov) +
- lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) ==
- (unsigned int)tx->tx_nob);
- LASSERT(tx->tx_niov >= 1);
- LASSERT(tx->tx_resid == tx->tx_nob);
-
- CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
- tx, (tx->tx_lnetmsg) ? tx->tx_lnetmsg->msg_hdr.type :
- KSOCK_MSG_NOOP,
- tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
-
- /*
- * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
- * but they're used inside spinlocks a lot.
- */
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
- spin_lock_bh(&sched->kss_lock);
-
- if (list_empty(&conn->ksnc_tx_queue) && !bufnob) {
- /* First packet starts the timeout */
- conn->ksnc_tx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */
- conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
- conn->ksnc_tx_bufnob = 0;
- mb(); /* order with adding to tx_queue */
- }
-
- if (msg->ksm_type == KSOCK_MSG_NOOP) {
- /*
- * The packet is noop ZC ACK, try to piggyback the ack_cookie
- * on a normal packet so I don't need to send it
- */
- LASSERT(msg->ksm_zc_cookies[1]);
- LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
-
- /* ZC ACK piggybacked on ztx release tx later */
- if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0))
- ztx = tx;
- } else {
- /*
- * It's a normal packet - can it piggback a noop zc-ack that
- * has been queued already?
- */
- LASSERT(!msg->ksm_zc_cookies[1]);
- LASSERT(conn->ksnc_proto->pro_queue_tx_msg);
-
- ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx);
- /* ztx will be released later */
- }
-
- if (ztx) {
- atomic_sub(ztx->tx_nob, &conn->ksnc_tx_nob);
- list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs);
- }
-
- if (conn->ksnc_tx_ready && /* able to send */
- !conn->ksnc_tx_scheduled) { /* not scheduled to send */
- /* +1 ref for scheduler */
- ksocknal_conn_addref(conn);
- list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- wake_up(&sched->kss_waitq);
- }
-
- spin_unlock_bh(&sched->kss_lock);
-}
-
-struct ksock_route *
-ksocknal_find_connectable_route_locked(struct ksock_peer *peer)
-{
- unsigned long now = cfs_time_current();
- struct list_head *tmp;
- struct ksock_route *route;
-
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
-
- /* connections being established */
- if (route->ksnr_scheduled)
- continue;
-
- /* all route types connected ? */
- if (!(ksocknal_route_mask() & ~route->ksnr_connected))
- continue;
-
- if (!(!route->ksnr_retry_interval || /* first attempt */
- cfs_time_aftereq(now, route->ksnr_timeout))) {
- CDEBUG(D_NET,
- "Too soon to retry route %pI4h (cnted %d, interval %ld, %ld secs later)\n",
- &route->ksnr_ipaddr,
- route->ksnr_connected,
- route->ksnr_retry_interval,
- cfs_duration_sec(route->ksnr_timeout - now));
- continue;
- }
-
- return route;
- }
-
- return NULL;
-}
-
-struct ksock_route *
-ksocknal_find_connecting_route_locked(struct ksock_peer *peer)
-{
- struct list_head *tmp;
- struct ksock_route *route;
-
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
-
- if (route->ksnr_scheduled)
- return route;
- }
-
- return NULL;
-}
-
-int
-ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
- struct lnet_process_id id)
-{
- struct ksock_peer *peer;
- struct ksock_conn *conn;
- rwlock_t *g_lock;
- int retry;
- int rc;
-
- LASSERT(!tx->tx_conn);
-
- g_lock = &ksocknal_data.ksnd_global_lock;
-
- for (retry = 0;; retry = 1) {
- read_lock(g_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer) {
- if (!ksocknal_find_connectable_route_locked(peer)) {
- conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
- if (conn) {
- /*
- * I've got no routes that need to be
- * connecting and I do have an actual
- * connection...
- */
- ksocknal_queue_tx_locked(tx, conn);
- read_unlock(g_lock);
- return 0;
- }
- }
- }
-
- /* I'll need a write lock... */
- read_unlock(g_lock);
-
- write_lock_bh(g_lock);
-
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer)
- break;
-
- write_unlock_bh(g_lock);
-
- if (id.pid & LNET_PID_USERFLAG) {
- CERROR("Refusing to create a connection to userspace process %s\n",
- libcfs_id2str(id));
- return -EHOSTUNREACH;
- }
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_id2str(id));
- return -EHOSTUNREACH;
- }
-
- rc = ksocknal_add_peer(ni, id,
- LNET_NIDADDR(id.nid),
- lnet_acceptor_port());
- if (rc) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_id2str(id), rc);
- return rc;
- }
- }
-
- ksocknal_launch_all_connections_locked(peer);
-
- conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
- if (conn) {
- /* Connection exists; queue message on it */
- ksocknal_queue_tx_locked(tx, conn);
- write_unlock_bh(g_lock);
- return 0;
- }
-
- if (peer->ksnp_accepting > 0 ||
- ksocknal_find_connecting_route_locked(peer)) {
- /* the message is going to be pinned to the peer */
- tx->tx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
-
- /* Queue the message until a connection is established */
- list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue);
- write_unlock_bh(g_lock);
- return 0;
- }
-
- write_unlock_bh(g_lock);
-
- /* NB Routes may be ignored if connections to them failed recently */
- CNETERR("No usable routes to %s\n", libcfs_id2str(id));
- return -EHOSTUNREACH;
-}
-
-int
-ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
- int mpflag = 1;
- int type = lntmsg->msg_type;
- struct lnet_process_id target = lntmsg->msg_target;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct kvec *payload_iov = lntmsg->msg_iov;
- struct bio_vec *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- struct ksock_tx *tx;
- int desc_size;
- int rc;
-
- /*
- * NB 'private' is different depending on what we're sending.
- * Just ignore it...
- */
- CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT(!payload_nob || payload_niov > 0);
- LASSERT(payload_niov <= LNET_MAX_IOV);
- /* payload is either all vaddrs or all pages */
- LASSERT(!(payload_kiov && payload_iov));
- LASSERT(!in_interrupt());
-
- if (payload_iov)
- desc_size = offsetof(struct ksock_tx,
- tx_frags.virt.iov[1 + payload_niov]);
- else
- desc_size = offsetof(struct ksock_tx,
- tx_frags.paged.kiov[payload_niov]);
-
- if (lntmsg->msg_vmflush)
- mpflag = cfs_memory_pressure_get_and_set();
- tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size);
- if (!tx) {
- CERROR("Can't allocate tx desc type %d size %d\n",
- type, desc_size);
- if (lntmsg->msg_vmflush)
- cfs_memory_pressure_restore(mpflag);
- return -ENOMEM;
- }
-
- tx->tx_conn = NULL; /* set when assigned a conn */
- tx->tx_lnetmsg = lntmsg;
-
- if (payload_iov) {
- tx->tx_kiov = NULL;
- tx->tx_nkiov = 0;
- tx->tx_iov = tx->tx_frags.virt.iov;
- tx->tx_niov = 1 +
- lnet_extract_iov(payload_niov, &tx->tx_iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- } else {
- tx->tx_niov = 1;
- tx->tx_iov = &tx->tx_frags.paged.iov;
- tx->tx_kiov = tx->tx_frags.paged.kiov;
- tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
-
- if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload)
- tx->tx_zc_capable = 1;
- }
-
- tx->tx_msg.ksm_csum = 0;
- tx->tx_msg.ksm_type = KSOCK_MSG_LNET;
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- tx->tx_msg.ksm_zc_cookies[1] = 0;
-
- /* The first fragment will be set later in pro_pack */
- rc = ksocknal_launch_packet(ni, tx, target);
- if (!mpflag)
- cfs_memory_pressure_restore(mpflag);
-
- if (!rc)
- return 0;
-
- ksocknal_free_tx(tx);
- return -EIO;
-}
-
-int
-ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
- struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
- if (IS_ERR(task))
- return PTR_ERR(task);
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nthreads++;
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
- return 0;
-}
-
-void
-ksocknal_thread_fini(void)
-{
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nthreads--;
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
-{
- static char ksocknal_slop_buffer[4096];
- struct kvec *kvec = conn->ksnc_rx_iov_space;
-
- int nob;
- unsigned int niov;
- int skipped;
-
- LASSERT(conn->ksnc_proto);
-
- if (*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) {
- /* Remind the socket to ack eagerly... */
- ksocknal_lib_eager_ack(conn);
- }
-
- if (!nob_to_skip) { /* right at next packet boundary now */
- conn->ksnc_rx_started = 0;
- mb(); /* racing with timeout thread */
-
- switch (conn->ksnc_proto->pro_version) {
- case KSOCK_PROTO_V2:
- case KSOCK_PROTO_V3:
- conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
- kvec->iov_base = &conn->ksnc_msg;
- kvec->iov_len = offsetof(struct ksock_msg, ksm_u);
- conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u);
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
- 1, offsetof(struct ksock_msg, ksm_u));
- break;
-
- case KSOCK_PROTO_V1:
- /* Receiving bare struct lnet_hdr */
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
- kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
- kvec->iov_len = sizeof(struct lnet_hdr);
- conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr);
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
- 1, sizeof(struct lnet_hdr));
- break;
-
- default:
- LBUG();
- }
- conn->ksnc_rx_csum = ~0;
- return 1;
- }
-
- /*
- * Set up to skip as much as possible now. If there's more left
- * (ran out of iov entries) we'll get called again
- */
- conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
- conn->ksnc_rx_nob_left = nob_to_skip;
- skipped = 0;
- niov = 0;
-
- do {
- nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer));
-
- kvec[niov].iov_base = ksocknal_slop_buffer;
- kvec[niov].iov_len = nob;
- niov++;
- skipped += nob;
- nob_to_skip -= nob;
-
- } while (nob_to_skip && /* mustn't overflow conn's rx iov */
- niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec));
-
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped);
- return 0;
-}
-
-static int
-ksocknal_process_receive(struct ksock_conn *conn)
-{
- struct kvec *kvec = conn->ksnc_rx_iov_space;
- struct lnet_hdr *lhdr;
- struct lnet_process_id *id;
- int rc;
-
- LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
-
- /* NB: sched lock NOT held */
- /* SOCKNAL_RX_LNET_HEADER is here for backward compatibility */
- LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
- again:
- if (iov_iter_count(&conn->ksnc_rx_to)) {
- rc = ksocknal_receive(conn);
-
- if (rc <= 0) {
- LASSERT(rc != -EAGAIN);
-
- if (!rc)
- CDEBUG(D_NET, "[%p] EOF from %s ip %pI4h:%d\n",
- conn,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- else if (!conn->ksnc_closing)
- CERROR("[%p] Error %d on read from %s ip %pI4h:%d\n",
- conn, rc,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
-
- /* it's not an error if conn is being closed */
- ksocknal_close_conn_and_siblings(conn,
- (conn->ksnc_closing) ? 0 : rc);
- return (!rc ? -ESHUTDOWN : rc);
- }
-
- if (iov_iter_count(&conn->ksnc_rx_to)) {
- /* short read */
- return -EAGAIN;
- }
- }
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_KSM_HEADER:
- if (conn->ksnc_flip) {
- __swab32s(&conn->ksnc_msg.ksm_type);
- __swab32s(&conn->ksnc_msg.ksm_csum);
- __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
- __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
- }
-
- if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
- conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
- CERROR("%s: Unknown message type: %x\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_type);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return -EPROTO;
- }
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
- conn->ksnc_msg.ksm_csum && /* has checksum */
- conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
- /* NOOP Checksum error */
- CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return -EIO;
- }
-
- if (conn->ksnc_msg.ksm_zc_cookies[1]) {
- __u64 cookie = 0;
-
- LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
- cookie = conn->ksnc_msg.ksm_zc_cookies[0];
-
- rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie,
- conn->ksnc_msg.ksm_zc_cookies[1]);
-
- if (rc) {
- CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return rc;
- }
- }
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
- ksocknal_new_packet(conn, 0);
- return 0; /* NOOP is done and just return */
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
- conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg);
-
- kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
- kvec->iov_len = sizeof(struct ksock_lnet_msg);
-
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
- 1, sizeof(struct ksock_lnet_msg));
-
- goto again; /* read lnet header now */
-
- case SOCKNAL_RX_LNET_HEADER:
- /* unpack message header */
- conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
-
- if (conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) {
- /* Userspace peer */
- lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
- id = &conn->ksnc_peer->ksnp_id;
-
- /* Substitute process ID assigned at connection time */
- lhdr->src_pid = cpu_to_le32(id->pid);
- lhdr->src_nid = cpu_to_le64(id->nid);
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
- ksocknal_conn_addref(conn); /* ++ref while parsing */
-
- rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
- &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
- conn->ksnc_peer->ksnp_id.nid, conn, 0);
- if (rc < 0) {
- /* I just received garbage: give up on this conn */
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, rc);
- ksocknal_conn_decref(conn);
- return -EPROTO;
- }
-
- /* I'm racing with ksocknal_recv() */
- LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
-
- if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
- return 0;
-
- /* ksocknal_recv() got called */
- goto again;
-
- case SOCKNAL_RX_LNET_PAYLOAD:
- /* payload all received */
- rc = 0;
-
- if (!conn->ksnc_rx_nob_left && /* not truncating */
- conn->ksnc_msg.ksm_csum && /* has checksum */
- conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
- CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
- rc = -EIO;
- }
-
- if (!rc && conn->ksnc_msg.ksm_zc_cookies[0]) {
- LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
-
- lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
- id = &conn->ksnc_peer->ksnp_id;
-
- rc = conn->ksnc_proto->pro_handle_zcreq(conn,
- conn->ksnc_msg.ksm_zc_cookies[0],
- *ksocknal_tunables.ksnd_nonblk_zcack ||
- le64_to_cpu(lhdr->src_nid) != id->nid);
- }
-
- lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc);
-
- if (rc) {
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, rc);
- return -EPROTO;
- }
- /* Fall through */
-
- case SOCKNAL_RX_SLOP:
- /* starting new packet? */
- if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left))
- return 0; /* come back later */
- goto again; /* try to finish reading slop now */
-
- default:
- break;
- }
-
- /* Not Reached */
- LBUG();
- return -EINVAL; /* keep gcc happy */
-}
-
-int
-ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, struct iov_iter *to, unsigned int rlen)
-{
- struct ksock_conn *conn = private;
- struct ksock_sched *sched = conn->ksnc_scheduler;
-
- LASSERT(iov_iter_count(to) <= rlen);
- LASSERT(to->nr_segs <= LNET_MAX_IOV);
-
- conn->ksnc_cookie = msg;
- conn->ksnc_rx_nob_left = rlen;
-
- conn->ksnc_rx_to = *to;
-
- LASSERT(conn->ksnc_rx_scheduled);
-
- spin_lock_bh(&sched->kss_lock);
-
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_PARSE_WAIT:
- list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
- wake_up(&sched->kss_waitq);
- LASSERT(conn->ksnc_rx_ready);
- break;
-
- case SOCKNAL_RX_PARSE:
- /* scheduler hasn't noticed I'm parsing yet */
- break;
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD;
-
- spin_unlock_bh(&sched->kss_lock);
- ksocknal_conn_decref(conn);
- return 0;
-}
-
-static inline int
-ksocknal_sched_cansleep(struct ksock_sched *sched)
-{
- int rc;
-
- spin_lock_bh(&sched->kss_lock);
-
- rc = !ksocknal_data.ksnd_shuttingdown &&
- list_empty(&sched->kss_rx_conns) &&
- list_empty(&sched->kss_tx_conns);
-
- spin_unlock_bh(&sched->kss_lock);
- return rc;
-}
-
-int ksocknal_scheduler(void *arg)
-{
- struct ksock_sched_info *info;
- struct ksock_sched *sched;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
- int rc;
- int nloops = 0;
- long id = (long)arg;
-
- info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)];
- sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
-
- rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt);
- if (rc) {
- CWARN("Can't set CPU partition affinity to %d: %d\n",
- info->ksi_cpt, rc);
- }
-
- spin_lock_bh(&sched->kss_lock);
-
- while (!ksocknal_data.ksnd_shuttingdown) {
- int did_something = 0;
-
- /* Ensure I progress everything semi-fairly */
-
- if (!list_empty(&sched->kss_rx_conns)) {
- conn = list_entry(sched->kss_rx_conns.next,
- struct ksock_conn, ksnc_rx_list);
- list_del(&conn->ksnc_rx_list);
-
- LASSERT(conn->ksnc_rx_scheduled);
- LASSERT(conn->ksnc_rx_ready);
-
- /*
- * clear rx_ready in case receive isn't complete.
- * Do it BEFORE we call process_recv, since
- * data_ready can set it any time after we release
- * kss_lock.
- */
- conn->ksnc_rx_ready = 0;
- spin_unlock_bh(&sched->kss_lock);
-
- rc = ksocknal_process_receive(conn);
-
- spin_lock_bh(&sched->kss_lock);
-
- /* I'm the only one that can clear this flag */
- LASSERT(conn->ksnc_rx_scheduled);
-
- /* Did process_receive get everything it wanted? */
- if (!rc)
- conn->ksnc_rx_ready = 1;
-
- if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
- /*
- * Conn blocked waiting for ksocknal_recv()
- * I change its state (under lock) to signal
- * it can be rescheduled
- */
- conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
- } else if (conn->ksnc_rx_ready) {
- /* reschedule for rx */
- list_add_tail(&conn->ksnc_rx_list,
- &sched->kss_rx_conns);
- } else {
- conn->ksnc_rx_scheduled = 0;
- /* drop my ref */
- ksocknal_conn_decref(conn);
- }
-
- did_something = 1;
- }
-
- if (!list_empty(&sched->kss_tx_conns)) {
- LIST_HEAD(zlist);
-
- if (!list_empty(&sched->kss_zombie_noop_txs)) {
- list_add(&zlist, &sched->kss_zombie_noop_txs);
- list_del_init(&sched->kss_zombie_noop_txs);
- }
-
- conn = list_entry(sched->kss_tx_conns.next,
- struct ksock_conn, ksnc_tx_list);
- list_del(&conn->ksnc_tx_list);
-
- LASSERT(conn->ksnc_tx_scheduled);
- LASSERT(conn->ksnc_tx_ready);
- LASSERT(!list_empty(&conn->ksnc_tx_queue));
-
- tx = list_entry(conn->ksnc_tx_queue.next,
- struct ksock_tx, tx_list);
-
- if (conn->ksnc_tx_carrier == tx)
- ksocknal_next_tx_carrier(conn);
-
- /* dequeue now so empty list => more to send */
- list_del(&tx->tx_list);
-
- /*
- * Clear tx_ready in case send isn't complete. Do
- * it BEFORE we call process_transmit, since
- * write_space can set it any time after we release
- * kss_lock.
- */
- conn->ksnc_tx_ready = 0;
- spin_unlock_bh(&sched->kss_lock);
-
- if (!list_empty(&zlist)) {
- /*
- * free zombie noop txs, it's fast because
- * noop txs are just put in freelist
- */
- ksocknal_txlist_done(NULL, &zlist, 0);
- }
-
- rc = ksocknal_process_transmit(conn, tx);
-
- if (rc == -ENOMEM || rc == -EAGAIN) {
- /*
- * Incomplete send: replace tx on HEAD of
- * tx_queue
- */
- spin_lock_bh(&sched->kss_lock);
- list_add(&tx->tx_list, &conn->ksnc_tx_queue);
- } else {
- /* Complete send; tx -ref */
- ksocknal_tx_decref(tx);
-
- spin_lock_bh(&sched->kss_lock);
- /* assume space for more */
- conn->ksnc_tx_ready = 1;
- }
-
- if (rc == -ENOMEM) {
- /*
- * Do nothing; after a short timeout, this
- * conn will be reposted on kss_tx_conns.
- */
- } else if (conn->ksnc_tx_ready &&
- !list_empty(&conn->ksnc_tx_queue)) {
- /* reschedule for tx */
- list_add_tail(&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- } else {
- conn->ksnc_tx_scheduled = 0;
- /* drop my ref */
- ksocknal_conn_decref(conn);
- }
-
- did_something = 1;
- }
- if (!did_something || /* nothing to do */
- ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
- spin_unlock_bh(&sched->kss_lock);
-
- nloops = 0;
-
- if (!did_something) { /* wait for something to do */
- rc = wait_event_interruptible_exclusive(
- sched->kss_waitq,
- !ksocknal_sched_cansleep(sched));
- LASSERT(!rc);
- } else {
- cond_resched();
- }
-
- spin_lock_bh(&sched->kss_lock);
- }
- }
-
- spin_unlock_bh(&sched->kss_lock);
- ksocknal_thread_fini();
- return 0;
-}
-
-/*
- * Add connection to kss_rx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_read_callback(struct ksock_conn *conn)
-{
- struct ksock_sched *sched;
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
-
- conn->ksnc_rx_ready = 1;
-
- if (!conn->ksnc_rx_scheduled) { /* not being progressed */
- list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
- conn->ksnc_rx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- wake_up(&sched->kss_waitq);
- }
- spin_unlock_bh(&sched->kss_lock);
-}
-
-/*
- * Add connection to kss_tx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_write_callback(struct ksock_conn *conn)
-{
- struct ksock_sched *sched;
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
-
- conn->ksnc_tx_ready = 1;
-
- if (!conn->ksnc_tx_scheduled && /* not being progressed */
- !list_empty(&conn->ksnc_tx_queue)) { /* packets to send */
- list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- wake_up(&sched->kss_waitq);
- }
-
- spin_unlock_bh(&sched->kss_lock);
-}
-
-static struct ksock_proto *
-ksocknal_parse_proto_version(struct ksock_hello_msg *hello)
-{
- __u32 version = 0;
-
- if (hello->kshm_magic == LNET_PROTO_MAGIC)
- version = hello->kshm_version;
- else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC))
- version = __swab32(hello->kshm_version);
-
- if (version) {
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol == 1)
- return NULL;
-
- if (*ksocknal_tunables.ksnd_protocol == 2 &&
- version == KSOCK_PROTO_V3)
- return NULL;
-#endif
- if (version == KSOCK_PROTO_V2)
- return &ksocknal_protocol_v2x;
-
- if (version == KSOCK_PROTO_V3)
- return &ksocknal_protocol_v3x;
-
- return NULL;
- }
-
- if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
- struct lnet_magicversion *hmv = (struct lnet_magicversion *)hello;
-
- BUILD_BUG_ON(sizeof(struct lnet_magicversion) !=
- offsetof(struct ksock_hello_msg, kshm_src_nid));
-
- if (hmv->version_major == cpu_to_le16(KSOCK_PROTO_V1_MAJOR) &&
- hmv->version_minor == cpu_to_le16(KSOCK_PROTO_V1_MINOR))
- return &ksocknal_protocol_v1x;
- }
-
- return NULL;
-}
-
-int
-ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- lnet_nid_t peer_nid, struct ksock_hello_msg *hello)
-{
- /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
- struct ksock_net *net = (struct ksock_net *)ni->ni_data;
-
- LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES);
-
- /* rely on caller to hold a ref on socket so it wouldn't disappear */
- LASSERT(conn->ksnc_proto);
-
- hello->kshm_src_nid = ni->ni_nid;
- hello->kshm_dst_nid = peer_nid;
- hello->kshm_src_pid = the_lnet.ln_pid;
-
- hello->kshm_src_incarnation = net->ksnn_incarnation;
- hello->kshm_ctype = conn->ksnc_type;
-
- return conn->ksnc_proto->pro_send_hello(conn, hello);
-}
-
-static int
-ksocknal_invert_type(int type)
-{
- switch (type) {
- case SOCKLND_CONN_ANY:
- case SOCKLND_CONN_CONTROL:
- return type;
- case SOCKLND_CONN_BULK_IN:
- return SOCKLND_CONN_BULK_OUT;
- case SOCKLND_CONN_BULK_OUT:
- return SOCKLND_CONN_BULK_IN;
- default:
- return SOCKLND_CONN_NONE;
- }
-}
-
-int
-ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- struct ksock_hello_msg *hello,
- struct lnet_process_id *peerid,
- __u64 *incarnation)
-{
- /* Return < 0 fatal error
- * 0 success
- * EALREADY lost connection race
- * EPROTO protocol version mismatch
- */
- struct socket *sock = conn->ksnc_sock;
- int active = !!conn->ksnc_proto;
- int timeout;
- int proto_match;
- int rc;
- struct ksock_proto *proto;
- struct lnet_process_id recv_id;
-
- /* socket type set on active connections - not set on passive */
- LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
-
- timeout = active ? *ksocknal_tunables.ksnd_timeout :
- lnet_acceptor_timeout();
-
- rc = lnet_sock_read(sock, &hello->kshm_magic,
- sizeof(hello->kshm_magic), timeout);
- if (rc) {
- CERROR("Error %d reading HELLO from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0);
- return rc;
- }
-
- if (hello->kshm_magic != LNET_PROTO_MAGIC &&
- hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
- hello->kshm_magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
- /* Unexpected magic! */
- CERROR("Bad magic(1) %#08x (%#08x expected) from %pI4h\n",
- __cpu_to_le32(hello->kshm_magic),
- LNET_PROTO_TCP_MAGIC,
- &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- rc = lnet_sock_read(sock, &hello->kshm_version,
- sizeof(hello->kshm_version), timeout);
- if (rc) {
- CERROR("Error %d reading HELLO from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0);
- return rc;
- }
-
- proto = ksocknal_parse_proto_version(hello);
- if (!proto) {
- if (!active) {
- /* unknown protocol from peer, tell peer my protocol */
- conn->ksnc_proto = &ksocknal_protocol_v3x;
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol == 2)
- conn->ksnc_proto = &ksocknal_protocol_v2x;
- else if (*ksocknal_tunables.ksnd_protocol == 1)
- conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
- hello->kshm_nips = 0;
- ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
- }
-
- CERROR("Unknown protocol version (%d.x expected) from %pI4h\n",
- conn->ksnc_proto->pro_version,
- &conn->ksnc_ipaddr);
-
- return -EPROTO;
- }
-
- proto_match = (conn->ksnc_proto == proto);
- conn->ksnc_proto = proto;
-
- /* receive the rest of hello message anyway */
- rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
- if (rc) {
- CERROR("Error %d reading or checking hello from from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0);
- return rc;
- }
-
- *incarnation = hello->kshm_src_incarnation;
-
- if (hello->kshm_src_nid == LNET_NID_ANY) {
- CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pI4h\n",
- &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- if (!active &&
- conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- /* Userspace NAL assigns peer process ID from socket */
- recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
- recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
- conn->ksnc_ipaddr);
- } else {
- recv_id.nid = hello->kshm_src_nid;
- recv_id.pid = hello->kshm_src_pid;
- }
-
- if (!active) {
- *peerid = recv_id;
-
- /* peer determines type */
- conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
- if (conn->ksnc_type == SOCKLND_CONN_NONE) {
- CERROR("Unexpected type %d from %s ip %pI4h\n",
- hello->kshm_ctype, libcfs_id2str(*peerid),
- &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- return 0;
- }
-
- if (peerid->pid != recv_id.pid ||
- peerid->nid != recv_id.nid) {
- LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host %pI4h, but they claimed they were %s; please check your Lustre configuration.\n",
- libcfs_id2str(*peerid),
- &conn->ksnc_ipaddr,
- libcfs_id2str(recv_id));
- return -EPROTO;
- }
-
- if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
- /* Possible protocol mismatch or I lost the connection race */
- return proto_match ? EALREADY : EPROTO;
- }
-
- if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
- CERROR("Mismatched types: me %d, %s ip %pI4h %d\n",
- conn->ksnc_type, libcfs_id2str(*peerid),
- &conn->ksnc_ipaddr, hello->kshm_ctype);
- return -EPROTO;
- }
-
- return 0;
-}
-
-static int
-ksocknal_connect(struct ksock_route *route)
-{
- LIST_HEAD(zombies);
- struct ksock_peer *peer = route->ksnr_peer;
- int type;
- int wanted;
- struct socket *sock;
- unsigned long deadline;
- int retry_later = 0;
- int rc = 0;
-
- deadline = cfs_time_add(cfs_time_current(),
- *ksocknal_tunables.ksnd_timeout * HZ);
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- LASSERT(route->ksnr_scheduled);
- LASSERT(!route->ksnr_connecting);
-
- route->ksnr_connecting = 1;
-
- for (;;) {
- wanted = ksocknal_route_mask() & ~route->ksnr_connected;
-
- /*
- * stop connecting if peer/route got closed under me, or
- * route got connected while queued
- */
- if (peer->ksnp_closing || route->ksnr_deleted ||
- !wanted) {
- retry_later = 0;
- break;
- }
-
- /* reschedule if peer is connecting to me */
- if (peer->ksnp_accepting > 0) {
- CDEBUG(D_NET,
- "peer %s(%d) already connecting to me, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid),
- peer->ksnp_accepting);
- retry_later = 1;
- }
-
- if (retry_later) /* needs reschedule */
- break;
-
- if (wanted & BIT(SOCKLND_CONN_ANY)) {
- type = SOCKLND_CONN_ANY;
- } else if (wanted & BIT(SOCKLND_CONN_CONTROL)) {
- type = SOCKLND_CONN_CONTROL;
- } else if (wanted & BIT(SOCKLND_CONN_BULK_IN)) {
- type = SOCKLND_CONN_BULK_IN;
- } else {
- LASSERT(wanted & BIT(SOCKLND_CONN_BULK_OUT));
- type = SOCKLND_CONN_BULK_OUT;
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (cfs_time_aftereq(cfs_time_current(), deadline)) {
- rc = -ETIMEDOUT;
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
- route->ksnr_ipaddr,
- route->ksnr_port);
- goto failed;
- }
-
- rc = lnet_connect(&sock, peer->ksnp_id.nid,
- route->ksnr_myipaddr,
- route->ksnr_ipaddr, route->ksnr_port);
- if (rc)
- goto failed;
-
- rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
- if (rc < 0) {
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
- route->ksnr_ipaddr,
- route->ksnr_port);
- goto failed;
- }
-
- /*
- * A +ve RC means I have to retry because I lost the connection
- * race or I have to renegotiate protocol version
- */
- retry_later = (rc);
- if (retry_later)
- CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid));
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
- }
-
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
-
- if (retry_later) {
- /*
- * re-queue for attention; this frees me up to handle
- * the peer's incoming connection request
- */
- if (rc == EALREADY ||
- (!rc && peer->ksnp_accepting > 0)) {
- /*
- * We want to introduce a delay before next
- * attempt to connect if we lost conn race,
- * but the race is resolved quickly usually,
- * so min_reconnectms should be good heuristic
- */
- route->ksnr_retry_interval =
- *ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000;
- route->ksnr_timeout = cfs_time_add(cfs_time_current(),
- route->ksnr_retry_interval);
- }
-
- ksocknal_launch_connection_locked(route);
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
- return retry_later;
-
- failed:
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
-
- /* This is a retry rather than a new connection */
- route->ksnr_retry_interval *= 2;
- route->ksnr_retry_interval =
- max(route->ksnr_retry_interval,
- (long)*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000);
- route->ksnr_retry_interval =
- min(route->ksnr_retry_interval,
- (long)*ksocknal_tunables.ksnd_max_reconnectms * HZ / 1000);
-
- LASSERT(route->ksnr_retry_interval);
- route->ksnr_timeout = cfs_time_add(cfs_time_current(),
- route->ksnr_retry_interval);
-
- if (!list_empty(&peer->ksnp_tx_queue) &&
- !peer->ksnp_accepting &&
- !ksocknal_find_connecting_route_locked(peer)) {
- struct ksock_conn *conn;
-
- /*
- * ksnp_tx_queue is queued on a conn on successful
- * connection for V1.x and V2.x
- */
- if (!list_empty(&peer->ksnp_conns)) {
- conn = list_entry(peer->ksnp_conns.next,
- struct ksock_conn, ksnc_list);
- LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
- }
-
- /*
- * take all the blocked packets while I've got the lock and
- * complete below...
- */
- list_splice_init(&peer->ksnp_tx_queue, &zombies);
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_peer_failed(peer);
- ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
- return 0;
-}
-
-/*
- * check whether we need to create more connds.
- * It will try to create new thread if it's necessary, @timeout can
- * be updated if failed to create, so caller wouldn't keep try while
- * running out of resource.
- */
-static int
-ksocknal_connd_check_start(time64_t sec, long *timeout)
-{
- char name[16];
- int rc;
- int total = ksocknal_data.ksnd_connd_starting +
- ksocknal_data.ksnd_connd_running;
-
- if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
- /* still in initializing */
- return 0;
- }
-
- if (total >= *ksocknal_tunables.ksnd_nconnds_max ||
- total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) {
- /*
- * can't create more connd, or still have enough
- * threads to handle more connecting
- */
- return 0;
- }
-
- if (list_empty(&ksocknal_data.ksnd_connd_routes)) {
- /* no pending connecting request */
- return 0;
- }
-
- if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) {
- /* may run out of resource, retry later */
- *timeout = HZ;
- return 0;
- }
-
- if (ksocknal_data.ksnd_connd_starting > 0) {
- /* serialize starting to avoid flood */
- return 0;
- }
-
- ksocknal_data.ksnd_connd_starting_stamp = sec;
- ksocknal_data.ksnd_connd_starting++;
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-
- /* NB: total is the next id */
- snprintf(name, sizeof(name), "socknal_cd%02d", total);
- rc = ksocknal_thread_start(ksocknal_connd, NULL, name);
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
- if (!rc)
- return 1;
-
- /* we tried ... */
- LASSERT(ksocknal_data.ksnd_connd_starting > 0);
- ksocknal_data.ksnd_connd_starting--;
- ksocknal_data.ksnd_connd_failed_stamp = ktime_get_real_seconds();
-
- return 1;
-}
-
-/*
- * check whether current thread can exit, it will return 1 if there are too
- * many threads and no creating in past 120 seconds.
- * Also, this function may update @timeout to make caller come back
- * again to recheck these conditions.
- */
-static int
-ksocknal_connd_check_stop(time64_t sec, long *timeout)
-{
- int val;
-
- if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
- /* still in initializing */
- return 0;
- }
-
- if (ksocknal_data.ksnd_connd_starting > 0) {
- /* in progress of starting new thread */
- return 0;
- }
-
- if (ksocknal_data.ksnd_connd_running <=
- *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */
- return 0;
- }
-
- /* created thread in past 120 seconds? */
- val = (int)(ksocknal_data.ksnd_connd_starting_stamp +
- SOCKNAL_CONND_TIMEOUT - sec);
-
- *timeout = (val > 0) ? val * HZ :
- SOCKNAL_CONND_TIMEOUT * HZ;
- if (val > 0)
- return 0;
-
- /* no creating in past 120 seconds */
-
- return ksocknal_data.ksnd_connd_running >
- ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV;
-}
-
-/*
- * Go through connd_routes queue looking for a route that we can process
- * right now, @timeout_p can be updated if we need to come back later
- */
-static struct ksock_route *
-ksocknal_connd_get_route_locked(signed long *timeout_p)
-{
- struct ksock_route *route;
- unsigned long now;
-
- now = cfs_time_current();
-
- /* connd_routes can contain both pending and ordinary routes */
- list_for_each_entry(route, &ksocknal_data.ksnd_connd_routes,
- ksnr_connd_list) {
- if (!route->ksnr_retry_interval ||
- cfs_time_aftereq(now, route->ksnr_timeout))
- return route;
-
- if (*timeout_p == MAX_SCHEDULE_TIMEOUT ||
- (int)*timeout_p > (int)(route->ksnr_timeout - now))
- *timeout_p = (int)(route->ksnr_timeout - now);
- }
-
- return NULL;
-}
-
-int
-ksocknal_connd(void *arg)
-{
- spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
- struct ksock_connreq *cr;
- wait_queue_entry_t wait;
- int nloops = 0;
- int cons_retry = 0;
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_bh(connd_lock);
-
- LASSERT(ksocknal_data.ksnd_connd_starting > 0);
- ksocknal_data.ksnd_connd_starting--;
- ksocknal_data.ksnd_connd_running++;
-
- while (!ksocknal_data.ksnd_shuttingdown) {
- struct ksock_route *route = NULL;
- time64_t sec = ktime_get_real_seconds();
- long timeout = MAX_SCHEDULE_TIMEOUT;
- int dropped_lock = 0;
-
- if (ksocknal_connd_check_stop(sec, &timeout)) {
- /* wakeup another one to check stop */
- wake_up(&ksocknal_data.ksnd_connd_waitq);
- break;
- }
-
- if (ksocknal_connd_check_start(sec, &timeout)) {
- /* created new thread */
- dropped_lock = 1;
- }
-
- if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
- /* Connection accepted by the listener */
- cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next,
- struct ksock_connreq, ksncr_list);
-
- list_del(&cr->ksncr_list);
- spin_unlock_bh(connd_lock);
- dropped_lock = 1;
-
- ksocknal_create_conn(cr->ksncr_ni, NULL,
- cr->ksncr_sock, SOCKLND_CONN_NONE);
- lnet_ni_decref(cr->ksncr_ni);
- kfree(cr);
-
- spin_lock_bh(connd_lock);
- }
-
- /*
- * Only handle an outgoing connection request if there
- * is a thread left to handle incoming connections and
- * create new connd
- */
- if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV <
- ksocknal_data.ksnd_connd_running) {
- route = ksocknal_connd_get_route_locked(&timeout);
- }
- if (route) {
- list_del(&route->ksnr_connd_list);
- ksocknal_data.ksnd_connd_connecting++;
- spin_unlock_bh(connd_lock);
- dropped_lock = 1;
-
- if (ksocknal_connect(route)) {
- /* consecutive retry */
- if (cons_retry++ > SOCKNAL_INSANITY_RECONN) {
- CWARN("massive consecutive re-connecting to %pI4h\n",
- &route->ksnr_ipaddr);
- cons_retry = 0;
- }
- } else {
- cons_retry = 0;
- }
-
- ksocknal_route_decref(route);
-
- spin_lock_bh(connd_lock);
- ksocknal_data.ksnd_connd_connecting--;
- }
-
- if (dropped_lock) {
- if (++nloops < SOCKNAL_RESCHED)
- continue;
- spin_unlock_bh(connd_lock);
- nloops = 0;
- cond_resched();
- spin_lock_bh(connd_lock);
- continue;
- }
-
- /* Nothing to do for 'timeout' */
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq,
- &wait);
- spin_unlock_bh(connd_lock);
-
- nloops = 0;
- schedule_timeout(timeout);
-
- remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
- spin_lock_bh(connd_lock);
- }
- ksocknal_data.ksnd_connd_running--;
- spin_unlock_bh(connd_lock);
-
- ksocknal_thread_fini();
- return 0;
-}
-
-static struct ksock_conn *
-ksocknal_find_timed_out_conn(struct ksock_peer *peer)
-{
- /* We're called with a shared lock on ksnd_global_lock */
- struct ksock_conn *conn;
- struct list_head *ctmp;
-
- list_for_each(ctmp, &peer->ksnp_conns) {
- int error;
-
- conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
- /* Don't need the {get,put}connsock dance to deref ksnc_sock */
- LASSERT(!conn->ksnc_closing);
-
- /*
- * SOCK_ERROR will reset error code of socket in
- * some platform (like Darwin8.x)
- */
- error = conn->ksnc_sock->sk->sk_err;
- if (error) {
- ksocknal_conn_addref(conn);
-
- switch (error) {
- case ECONNRESET:
- CNETERR("A connection with %s (%pI4h:%d) was reset; it may have rebooted.\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- break;
- case ETIMEDOUT:
- CNETERR("A connection with %s (%pI4h:%d) timed out; the network or node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- break;
- default:
- CNETERR("An unexpected network error %d occurred with %s (%pI4h:%d\n",
- error,
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- break;
- }
-
- return conn;
- }
-
- if (conn->ksnc_rx_started &&
- cfs_time_aftereq(cfs_time_current(),
- conn->ksnc_rx_deadline)) {
- /* Timed out incomplete incoming message */
- ksocknal_conn_addref(conn);
- CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port,
- conn->ksnc_rx_state,
- iov_iter_count(&conn->ksnc_rx_to),
- conn->ksnc_rx_nob_left);
- return conn;
- }
-
- if ((!list_empty(&conn->ksnc_tx_queue) ||
- conn->ksnc_sock->sk->sk_wmem_queued) &&
- cfs_time_aftereq(cfs_time_current(),
- conn->ksnc_tx_deadline)) {
- /*
- * Timed out messages queued for sending or
- * buffered in the socket's send buffer
- */
- ksocknal_conn_addref(conn);
- CNETERR("Timeout sending data to %s (%pI4h:%d) the network or that node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- return conn;
- }
- }
-
- return NULL;
-}
-
-static inline void
-ksocknal_flush_stale_txs(struct ksock_peer *peer)
-{
- struct ksock_tx *tx;
- struct ksock_tx *tmp;
- LIST_HEAD(stale_txs);
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- list_for_each_entry_safe(tx, tmp, &peer->ksnp_tx_queue, tx_list) {
- if (!cfs_time_aftereq(cfs_time_current(),
- tx->tx_deadline))
- break;
-
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &stale_txs);
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
-}
-
-static int
-ksocknal_send_keepalive_locked(struct ksock_peer *peer)
- __must_hold(&ksocknal_data.ksnd_global_lock)
-{
- struct ksock_sched *sched;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
-
- /* last_alive will be updated by create_conn */
- if (list_empty(&peer->ksnp_conns))
- return 0;
-
- if (peer->ksnp_proto != &ksocknal_protocol_v3x)
- return 0;
-
- if (*ksocknal_tunables.ksnd_keepalive <= 0 ||
- time_before(cfs_time_current(),
- cfs_time_add(peer->ksnp_last_alive,
- *ksocknal_tunables.ksnd_keepalive * HZ)))
- return 0;
-
- if (time_before(cfs_time_current(), peer->ksnp_send_keepalive))
- return 0;
-
- /*
- * retry 10 secs later, so we wouldn't put pressure
- * on this peer if we failed to send keepalive this time
- */
- peer->ksnp_send_keepalive = cfs_time_shift(10);
-
- conn = ksocknal_find_conn_locked(peer, NULL, 1);
- if (conn) {
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
- if (!list_empty(&conn->ksnc_tx_queue)) {
- spin_unlock_bh(&sched->kss_lock);
- /* there is an queued ACK, don't need keepalive */
- return 0;
- }
-
- spin_unlock_bh(&sched->kss_lock);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- /* cookie = 1 is reserved for keepalive PING */
- tx = ksocknal_alloc_tx_noop(1, 1);
- if (!tx) {
- read_lock(&ksocknal_data.ksnd_global_lock);
- return -ENOMEM;
- }
-
- if (!ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) {
- read_lock(&ksocknal_data.ksnd_global_lock);
- return 1;
- }
-
- ksocknal_free_tx(tx);
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- return -EIO;
-}
-
-static void
-ksocknal_check_peer_timeouts(int idx)
-{
- struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
- struct ksock_peer *peer;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
-
- again:
- /*
- * NB. We expect to have a look at all the peers and not find any
- * connections to time out, so we just use a shared lock while we
- * take a look...
- */
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- list_for_each_entry(peer, peers, ksnp_list) {
- unsigned long deadline = 0;
- struct ksock_tx *tx_stale;
- int resid = 0;
- int n = 0;
-
- if (ksocknal_send_keepalive_locked(peer)) {
- read_unlock(&ksocknal_data.ksnd_global_lock);
- goto again;
- }
-
- conn = ksocknal_find_timed_out_conn(peer);
-
- if (conn) {
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
-
- /*
- * NB we won't find this one again, but we can't
- * just proceed with the next peer, since we dropped
- * ksnd_global_lock and it might be dead already!
- */
- ksocknal_conn_decref(conn);
- goto again;
- }
-
- /*
- * we can't process stale txs right here because we're
- * holding only shared lock
- */
- if (!list_empty(&peer->ksnp_tx_queue)) {
- tx = list_entry(peer->ksnp_tx_queue.next,
- struct ksock_tx, tx_list);
-
- if (cfs_time_aftereq(cfs_time_current(),
- tx->tx_deadline)) {
- ksocknal_peer_addref(peer);
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_flush_stale_txs(peer);
-
- ksocknal_peer_decref(peer);
- goto again;
- }
- }
-
- if (list_empty(&peer->ksnp_zc_req_list))
- continue;
-
- tx_stale = NULL;
- spin_lock(&peer->ksnp_lock);
- list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
- if (!cfs_time_aftereq(cfs_time_current(),
- tx->tx_deadline))
- break;
- /* ignore the TX if connection is being closed */
- if (tx->tx_conn->ksnc_closing)
- continue;
- if (!tx_stale)
- tx_stale = tx;
- n++;
- }
-
- if (!tx_stale) {
- spin_unlock(&peer->ksnp_lock);
- continue;
- }
-
- deadline = tx_stale->tx_deadline;
- resid = tx_stale->tx_resid;
- conn = tx_stale->tx_conn;
- ksocknal_conn_addref(conn);
-
- spin_unlock(&peer->ksnp_lock);
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- CERROR("Total %d stale ZC_REQs for peer %s detected; the oldest(%p) timed out %ld secs ago, resid: %d, wmem: %d\n",
- n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale,
- cfs_duration_sec(cfs_time_current() - deadline),
- resid, conn->ksnc_sock->sk->sk_wmem_queued);
-
- ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
- ksocknal_conn_decref(conn);
- goto again;
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_reaper(void *arg)
-{
- wait_queue_entry_t wait;
- struct ksock_conn *conn;
- struct ksock_sched *sched;
- struct list_head enomem_conns;
- int nenomem_conns;
- long timeout;
- int i;
- int peer_index = 0;
- unsigned long deadline = cfs_time_current();
-
- INIT_LIST_HEAD(&enomem_conns);
- init_waitqueue_entry(&wait, current);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- while (!ksocknal_data.ksnd_shuttingdown) {
- if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) {
- conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next,
- struct ksock_conn, ksnc_list);
- list_del(&conn->ksnc_list);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_terminate_conn(conn);
- ksocknal_conn_decref(conn);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
- continue;
- }
-
- if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) {
- conn = list_entry(ksocknal_data.ksnd_zombie_conns.next,
- struct ksock_conn, ksnc_list);
- list_del(&conn->ksnc_list);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_destroy_conn(conn);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
- continue;
- }
-
- if (!list_empty(&ksocknal_data.ksnd_enomem_conns)) {
- list_add(&enomem_conns,
- &ksocknal_data.ksnd_enomem_conns);
- list_del_init(&ksocknal_data.ksnd_enomem_conns);
- }
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- /* reschedule all the connections that stalled with ENOMEM... */
- nenomem_conns = 0;
- while (!list_empty(&enomem_conns)) {
- conn = list_entry(enomem_conns.next, struct ksock_conn,
- ksnc_tx_list);
- list_del(&conn->ksnc_tx_list);
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
-
- LASSERT(conn->ksnc_tx_scheduled);
- conn->ksnc_tx_ready = 1;
- list_add_tail(&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- wake_up(&sched->kss_waitq);
-
- spin_unlock_bh(&sched->kss_lock);
- nenomem_conns++;
- }
-
- /* careful with the jiffy wrap... */
- while ((timeout = cfs_time_sub(deadline,
- cfs_time_current())) <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = ksocknal_data.ksnd_peer_hash_size;
-
- /*
- * Time to check for timeouts on a few more peers: I do
- * checks every 'p' seconds on a proportion of the peer
- * table and I need to check every connection 'n' times
- * within a timeout interval, to ensure I detect a
- * timeout on any connection within (n+1)/n times the
- * timeout interval.
- */
- if (*ksocknal_tunables.ksnd_timeout > n * p)
- chunk = (chunk * n * p) /
- *ksocknal_tunables.ksnd_timeout;
- if (!chunk)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- ksocknal_check_peer_timeouts(peer_index);
- peer_index = (peer_index + 1) %
- ksocknal_data.ksnd_peer_hash_size;
- }
-
- deadline = cfs_time_add(deadline, p * HZ);
- }
-
- if (nenomem_conns) {
- /*
- * Reduce my timeout if I rescheduled ENOMEM conns.
- * This also prevents me getting woken immediately
- * if any go back on my enomem list.
- */
- timeout = SOCKNAL_ENOMEM_RETRY;
- }
- ksocknal_data.ksnd_reaper_waketime =
- cfs_time_add(cfs_time_current(), timeout);
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
-
- if (!ksocknal_data.ksnd_shuttingdown &&
- list_empty(&ksocknal_data.ksnd_deathrow_conns) &&
- list_empty(&ksocknal_data.ksnd_zombie_conns))
- schedule_timeout(timeout);
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
- }
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_thread_fini();
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
deleted file mode 100644
index 7941cfa526bc..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ /dev/null
@@ -1,533 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include "socklnd.h"
-
-int
-ksocknal_lib_get_conn_addrs(struct ksock_conn *conn)
-{
- int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr,
- &conn->ksnc_port);
-
- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
- LASSERT(!conn->ksnc_closing);
-
- if (rc) {
- CERROR("Error %d getting sock peer IP\n", rc);
- return rc;
- }
-
- rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL);
- if (rc) {
- CERROR("Error %d getting sock local IP\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-int
-ksocknal_lib_zc_capable(struct ksock_conn *conn)
-{
- int caps = conn->ksnc_sock->sk->sk_route_caps;
-
- if (conn->ksnc_proto == &ksocknal_protocol_v1x)
- return 0;
-
- /*
- * ZC if the socket supports scatter/gather and doesn't need software
- * checksums
- */
- return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK));
-}
-
-int
-ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
- struct socket *sock = conn->ksnc_sock;
- int nob, i;
-
- if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
- conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
- tx->tx_nob == tx->tx_resid && /* frist sending */
- !tx->tx_msg.ksm_csum) /* not checksummed */
- ksocknal_lib_csum_tx(tx);
-
- for (nob = i = 0; i < tx->tx_niov; i++)
- nob += tx->tx_iov[i].iov_len;
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
- tx->tx_iov, tx->tx_niov, nob);
- return sock_sendmsg(sock, &msg);
-}
-
-int
-ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct socket *sock = conn->ksnc_sock;
- struct bio_vec *kiov = tx->tx_kiov;
- int rc;
- int nob;
-
- /* Not NOOP message */
- LASSERT(tx->tx_lnetmsg);
-
- if (tx->tx_msg.ksm_zc_cookies[0]) {
- /* Zero copy is enabled */
- struct sock *sk = sock->sk;
- struct page *page = kiov->bv_page;
- int offset = kiov->bv_offset;
- int fragsize = kiov->bv_len;
- int msgflg = MSG_DONTWAIT;
-
- CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->bv_len);
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- fragsize < tx->tx_resid)
- msgflg |= MSG_MORE;
-
- if (sk->sk_prot->sendpage) {
- rc = sk->sk_prot->sendpage(sk, page,
- offset, fragsize, msgflg);
- } else {
- rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
- }
- } else {
- struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
- int i;
-
- for (nob = i = 0; i < tx->tx_nkiov; i++)
- nob += kiov[i].bv_len;
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
- kiov, tx->tx_nkiov, nob);
- rc = sock_sendmsg(sock, &msg);
- }
- return rc;
-}
-
-void
-ksocknal_lib_eager_ack(struct ksock_conn *conn)
-{
- int opt = 1;
- struct socket *sock = conn->ksnc_sock;
-
- /*
- * Remind the socket to ACK eagerly. If I don't, the socket might
- * think I'm about to send something it could piggy-back the ACK
- * on, introducing delay in completing zero-copy sends in my
- * peer.
- */
- kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt,
- sizeof(opt));
-}
-
-static int lustre_csum(struct kvec *v, void *context)
-{
- struct ksock_conn *conn = context;
- conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
- v->iov_base, v->iov_len);
- return 0;
-}
-
-int
-ksocknal_lib_recv(struct ksock_conn *conn)
-{
- struct msghdr msg = { .msg_iter = conn->ksnc_rx_to };
- __u32 saved_csum;
- int rc;
-
- rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
- if (rc <= 0)
- return rc;
-
- saved_csum = conn->ksnc_msg.ksm_csum;
- if (!saved_csum)
- return rc;
-
- /* header is included only in V2 - V3 checksums only the bulk data */
- if (!(conn->ksnc_rx_to.type & ITER_BVEC) &&
- conn->ksnc_proto != &ksocknal_protocol_v2x)
- return rc;
-
- /* accumulate checksum */
- conn->ksnc_msg.ksm_csum = 0;
- iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn);
- conn->ksnc_msg.ksm_csum = saved_csum;
-
- return rc;
-}
-
-void
-ksocknal_lib_csum_tx(struct ksock_tx *tx)
-{
- int i;
- __u32 csum;
- void *base;
-
- LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg);
- LASSERT(tx->tx_conn);
- LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
-
- tx->tx_msg.ksm_csum = 0;
-
- csum = crc32_le(~0, tx->tx_iov[0].iov_base,
- tx->tx_iov[0].iov_len);
-
- if (tx->tx_kiov) {
- for (i = 0; i < tx->tx_nkiov; i++) {
- base = kmap(tx->tx_kiov[i].bv_page) +
- tx->tx_kiov[i].bv_offset;
-
- csum = crc32_le(csum, base, tx->tx_kiov[i].bv_len);
-
- kunmap(tx->tx_kiov[i].bv_page);
- }
- } else {
- for (i = 1; i < tx->tx_niov; i++)
- csum = crc32_le(csum, tx->tx_iov[i].iov_base,
- tx->tx_iov[i].iov_len);
- }
-
- if (*ksocknal_tunables.ksnd_inject_csum_error) {
- csum++;
- *ksocknal_tunables.ksnd_inject_csum_error = 0;
- }
-
- tx->tx_msg.ksm_csum = csum;
-}
-
-int
-ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
- int *rxmem, int *nagle)
-{
- struct socket *sock = conn->ksnc_sock;
- int len;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) {
- LASSERT(conn->ksnc_closing);
- *txmem = *rxmem = *nagle = 0;
- return -ESHUTDOWN;
- }
-
- rc = lnet_sock_getbuf(sock, txmem, rxmem);
- if (!rc) {
- len = sizeof(*nagle);
- rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)nagle, &len);
- }
-
- ksocknal_connsock_decref(conn);
-
- if (!rc)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
-
- return rc;
-}
-
-int
-ksocknal_lib_setup_sock(struct socket *sock)
-{
- int rc;
- int option;
- int keep_idle;
- int keep_intvl;
- int keep_count;
- int do_keepalive;
- struct linger linger;
-
- sock->sk->sk_allocation = GFP_NOFS;
-
- /*
- * Ensure this socket aborts active sends immediately when we close
- * it.
- */
- linger.l_onoff = 0;
- linger.l_linger = 0;
-
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger,
- sizeof(linger));
- if (rc) {
- CERROR("Can't set SO_LINGER: %d\n", rc);
- return rc;
- }
-
- option = -1;
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option,
- sizeof(option));
- if (rc) {
- CERROR("Can't set SO_LINGER2: %d\n", rc);
- return rc;
- }
-
- if (!*ksocknal_tunables.ksnd_nagle) {
- option = 1;
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't disable nagle: %d\n", rc);
- return rc;
- }
- }
-
- rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size);
- if (rc) {
- CERROR("Can't set buffer tx %d, rx %d buffers: %d\n",
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size, rc);
- return rc;
- }
-
-/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
-
- /* snapshot tunables */
- keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = *ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
- do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
- option = (do_keepalive ? 1 : 0);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option,
- sizeof(option));
- if (rc) {
- CERROR("Can't set SO_KEEPALIVE: %d\n", rc);
- return rc;
- }
-
- if (!do_keepalive)
- return 0;
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle,
- sizeof(keep_idle));
- if (rc) {
- CERROR("Can't set TCP_KEEPIDLE: %d\n", rc);
- return rc;
- }
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keep_intvl, sizeof(keep_intvl));
- if (rc) {
- CERROR("Can't set TCP_KEEPINTVL: %d\n", rc);
- return rc;
- }
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count,
- sizeof(keep_count));
- if (rc) {
- CERROR("Can't set TCP_KEEPCNT: %d\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-void
-ksocknal_lib_push_conn(struct ksock_conn *conn)
-{
- struct sock *sk;
- struct tcp_sock *tp;
- int nonagle;
- int val = 1;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) /* being shut down */
- return;
-
- sk = conn->ksnc_sock->sk;
- tp = tcp_sk(sk);
-
- lock_sock(sk);
- nonagle = tp->nonagle;
- tp->nonagle = 1;
- release_sock(sk);
-
- rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof(val));
- LASSERT(!rc);
-
- lock_sock(sk);
- tp->nonagle = nonagle;
- release_sock(sk);
-
- ksocknal_connsock_decref(conn);
-}
-
-/*
- * socket call back in Linux
- */
-static void
-ksocknal_data_ready(struct sock *sk)
-{
- struct ksock_conn *conn;
-
- /* interleave correctly with closing sockets... */
- LASSERT(!in_irq());
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
- if (!conn) { /* raced with ksocknal_terminate_conn */
- LASSERT(sk->sk_data_ready != &ksocknal_data_ready);
- sk->sk_data_ready(sk);
- } else {
- ksocknal_read_callback(conn);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-static void
-ksocknal_write_space(struct sock *sk)
-{
- struct ksock_conn *conn;
- int wspace;
- int min_wpace;
-
- /* interleave correctly with closing sockets... */
- LASSERT(!in_irq());
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
- wspace = sk_stream_wspace(sk);
- min_wpace = sk_stream_min_wspace(sk);
-
- CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
- sk, wspace, min_wpace, conn,
- !conn ? "" : (conn->ksnc_tx_ready ?
- " ready" : " blocked"),
- !conn ? "" : (conn->ksnc_tx_scheduled ?
- " scheduled" : " idle"),
- !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ?
- " empty" : " queued"));
-
- if (!conn) { /* raced with ksocknal_terminate_conn */
- LASSERT(sk->sk_write_space != &ksocknal_write_space);
- sk->sk_write_space(sk);
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return;
- }
-
- if (wspace >= min_wpace) { /* got enough space */
- ksocknal_write_callback(conn);
-
- /*
- * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
- * ENOMEM check in ksocknal_transmit is race-free (think about
- * it).
- */
- clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-void
-ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn)
-{
- conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
- conn->ksnc_saved_write_space = sock->sk->sk_write_space;
-}
-
-void
-ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn)
-{
- sock->sk->sk_user_data = conn;
- sock->sk->sk_data_ready = ksocknal_data_ready;
- sock->sk->sk_write_space = ksocknal_write_space;
-}
-
-void
-ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn)
-{
- /*
- * Remove conn's network callbacks.
- * NB I _have_ to restore the callback, rather than storing a noop,
- * since the socket could survive past this module being unloaded!!
- */
- sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
- sock->sk->sk_write_space = conn->ksnc_saved_write_space;
-
- /*
- * A callback could be in progress already; they hold a read lock
- * on ksnd_global_lock (to serialise with me) and NOOP if
- * sk_user_data is NULL.
- */
- sock->sk->sk_user_data = NULL;
-}
-
-int
-ksocknal_lib_memory_pressure(struct ksock_conn *conn)
-{
- int rc = 0;
- struct ksock_sched *sched;
-
- sched = conn->ksnc_scheduler;
- spin_lock_bh(&sched->kss_lock);
-
- if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) &&
- !conn->ksnc_tx_ready) {
- /*
- * SOCK_NOSPACE is set when the socket fills
- * and cleared in the write_space callback
- * (which also sets ksnc_tx_ready). If
- * SOCK_NOSPACE and ksnc_tx_ready are BOTH
- * zero, I didn't fill the socket and
- * write_space won't reschedule me, so I
- * return -ENOMEM to get my caller to retry
- * after a timeout
- */
- rc = -ENOMEM;
- }
-
- spin_unlock_bh(&sched->kss_lock);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
deleted file mode 100644
index 5663a4ca94d4..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include "socklnd.h"
-
-static int sock_timeout = 50;
-module_param(sock_timeout, int, 0644);
-MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)");
-
-static int credits = 256;
-module_param(credits, int, 0444);
-MODULE_PARM_DESC(credits, "# concurrent sends");
-
-static int peer_credits = 8;
-module_param(peer_credits, int, 0444);
-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
-
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
-
-static int peer_timeout = 180;
-module_param(peer_timeout, int, 0444);
-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
-
-/*
- * Number of daemons in each thread pool which is percpt,
- * we will estimate reasonable value based on CPUs if it's not set.
- */
-static unsigned int nscheds;
-module_param(nscheds, int, 0444);
-MODULE_PARM_DESC(nscheds, "# scheduler daemons in each pool while starting");
-
-static int nconnds = 4;
-module_param(nconnds, int, 0444);
-MODULE_PARM_DESC(nconnds, "# connection daemons while starting");
-
-static int nconnds_max = 64;
-module_param(nconnds_max, int, 0444);
-MODULE_PARM_DESC(nconnds_max, "max # connection daemons");
-
-static int min_reconnectms = 1000;
-module_param(min_reconnectms, int, 0644);
-MODULE_PARM_DESC(min_reconnectms, "min connection retry interval (mS)");
-
-static int max_reconnectms = 60000;
-module_param(max_reconnectms, int, 0644);
-MODULE_PARM_DESC(max_reconnectms, "max connection retry interval (mS)");
-
-# define DEFAULT_EAGER_ACK 0
-static int eager_ack = DEFAULT_EAGER_ACK;
-module_param(eager_ack, int, 0644);
-MODULE_PARM_DESC(eager_ack, "send tcp ack packets eagerly");
-
-static int typed_conns = 1;
-module_param(typed_conns, int, 0444);
-MODULE_PARM_DESC(typed_conns, "use different sockets for bulk");
-
-static int min_bulk = 1 << 10;
-module_param(min_bulk, int, 0644);
-MODULE_PARM_DESC(min_bulk, "smallest 'large' message");
-
-# define DEFAULT_BUFFER_SIZE 0
-static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
-module_param(tx_buffer_size, int, 0644);
-MODULE_PARM_DESC(tx_buffer_size, "socket tx buffer size (0 for system default)");
-
-static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
-module_param(rx_buffer_size, int, 0644);
-MODULE_PARM_DESC(rx_buffer_size, "socket rx buffer size (0 for system default)");
-
-static int nagle;
-module_param(nagle, int, 0644);
-MODULE_PARM_DESC(nagle, "enable NAGLE?");
-
-static int round_robin = 1;
-module_param(round_robin, int, 0644);
-MODULE_PARM_DESC(round_robin, "Round robin for multiple interfaces");
-
-static int keepalive = 30;
-module_param(keepalive, int, 0644);
-MODULE_PARM_DESC(keepalive, "# seconds before send keepalive");
-
-static int keepalive_idle = 30;
-module_param(keepalive_idle, int, 0644);
-MODULE_PARM_DESC(keepalive_idle, "# idle seconds before probe");
-
-#define DEFAULT_KEEPALIVE_COUNT 5
-static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
-module_param(keepalive_count, int, 0644);
-MODULE_PARM_DESC(keepalive_count, "# missed probes == dead");
-
-static int keepalive_intvl = 5;
-module_param(keepalive_intvl, int, 0644);
-MODULE_PARM_DESC(keepalive_intvl, "seconds between probes");
-
-static int enable_csum;
-module_param(enable_csum, int, 0644);
-MODULE_PARM_DESC(enable_csum, "enable check sum");
-
-static int inject_csum_error;
-module_param(inject_csum_error, int, 0644);
-MODULE_PARM_DESC(inject_csum_error, "set non-zero to inject a checksum error");
-
-static int nonblk_zcack = 1;
-module_param(nonblk_zcack, int, 0644);
-MODULE_PARM_DESC(nonblk_zcack, "always send ZC-ACK on non-blocking connection");
-
-static unsigned int zc_min_payload = 16 << 10;
-module_param(zc_min_payload, int, 0644);
-MODULE_PARM_DESC(zc_min_payload, "minimum payload size to zero copy");
-
-static unsigned int zc_recv;
-module_param(zc_recv, int, 0644);
-MODULE_PARM_DESC(zc_recv, "enable ZC recv for Chelsio driver");
-
-static unsigned int zc_recv_min_nfrags = 16;
-module_param(zc_recv_min_nfrags, int, 0644);
-MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv");
-
-#if SOCKNAL_VERSION_DEBUG
-static int protocol = 3;
-module_param(protocol, int, 0644);
-MODULE_PARM_DESC(protocol, "protocol version");
-#endif
-
-struct ksock_tunables ksocknal_tunables;
-
-int ksocknal_tunables_init(void)
-{
- /* initialize ksocknal_tunables structure */
- ksocknal_tunables.ksnd_timeout = &sock_timeout;
- ksocknal_tunables.ksnd_nscheds = &nscheds;
- ksocknal_tunables.ksnd_nconnds = &nconnds;
- ksocknal_tunables.ksnd_nconnds_max = &nconnds_max;
- ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms;
- ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms;
- ksocknal_tunables.ksnd_eager_ack = &eager_ack;
- ksocknal_tunables.ksnd_typed_conns = &typed_conns;
- ksocknal_tunables.ksnd_min_bulk = &min_bulk;
- ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size;
- ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size;
- ksocknal_tunables.ksnd_nagle = &nagle;
- ksocknal_tunables.ksnd_round_robin = &round_robin;
- ksocknal_tunables.ksnd_keepalive = &keepalive;
- ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle;
- ksocknal_tunables.ksnd_keepalive_count = &keepalive_count;
- ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl;
- ksocknal_tunables.ksnd_credits = &credits;
- ksocknal_tunables.ksnd_peertxcredits = &peer_credits;
- ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits;
- ksocknal_tunables.ksnd_peertimeout = &peer_timeout;
- ksocknal_tunables.ksnd_enable_csum = &enable_csum;
- ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error;
- ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack;
- ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload;
- ksocknal_tunables.ksnd_zc_recv = &zc_recv;
- ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
-
-#if SOCKNAL_VERSION_DEBUG
- ksocknal_tunables.ksnd_protocol = &protocol;
-#endif
-
- if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10))
- *ksocknal_tunables.ksnd_zc_min_payload = 2 << 10;
-
- return 0;
-};
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
deleted file mode 100644
index 05982dac781c..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
+++ /dev/null
@@ -1,810 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, Intel Corporation.
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include "socklnd.h"
-
-/*
- * Protocol entries :
- * pro_send_hello : send hello message
- * pro_recv_hello : receive hello message
- * pro_pack : pack message header
- * pro_unpack : unpack message header
- * pro_queue_tx_zcack() : Called holding BH lock: kss_lock
- * return 1 if ACK is piggybacked, otherwise return 0
- * pro_queue_tx_msg() : Called holding BH lock: kss_lock
- * return the ACK that piggybacked by my message, or NULL
- * pro_handle_zcreq() : handler of incoming ZC-REQ
- * pro_handle_zcack() : handler of incoming ZC-ACK
- * pro_match_tx() : Called holding glock
- */
-
-static struct ksock_tx *
-ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg)
-{
- /* V1.x, just enqueue it */
- list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
- return NULL;
-}
-
-void
-ksocknal_next_tx_carrier(struct ksock_conn *conn)
-{
- struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
- /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
- LASSERT(!list_empty(&conn->ksnc_tx_queue));
- LASSERT(tx);
-
- /* Next TX that can carry ZC-ACK or LNet message */
- if (tx->tx_list.next == &conn->ksnc_tx_queue) {
- /* no more packets queued */
- conn->ksnc_tx_carrier = NULL;
- } else {
- conn->ksnc_tx_carrier = list_next_entry(tx, tx_list);
- LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
- }
-}
-
-static int
-ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn,
- struct ksock_tx *tx_ack, __u64 cookie)
-{
- struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
- LASSERT(!tx_ack ||
- tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- /*
- * Enqueue or piggyback tx_ack / cookie
- * . no tx can piggyback cookie of tx_ack (or cookie), just
- * enqueue the tx_ack (if tx_ack != NUL) and return NULL.
- * . There is tx can piggyback cookie of tx_ack (or cookie),
- * piggyback the cookie and return the tx.
- */
- if (!tx) {
- if (tx_ack) {
- list_add_tail(&tx_ack->tx_list,
- &conn->ksnc_tx_queue);
- conn->ksnc_tx_carrier = tx_ack;
- }
- return 0;
- }
-
- if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
- /* tx is noop zc-ack, can't piggyback zc-ack cookie */
- if (tx_ack)
- list_add_tail(&tx_ack->tx_list,
- &conn->ksnc_tx_queue);
- return 0;
- }
-
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
- LASSERT(!tx->tx_msg.ksm_zc_cookies[1]);
-
- if (tx_ack)
- cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
-
- /* piggyback the zc-ack cookie */
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- /* move on to the next TX which can carry cookie */
- ksocknal_next_tx_carrier(conn);
-
- return 1;
-}
-
-static struct ksock_tx *
-ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg)
-{
- struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
- /*
- * Enqueue tx_msg:
- * . If there is no NOOP on the connection, just enqueue
- * tx_msg and return NULL
- * . If there is NOOP on the connection, piggyback the cookie
- * and replace the NOOP tx, and return the NOOP tx.
- */
- if (!tx) { /* nothing on queue */
- list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
- conn->ksnc_tx_carrier = tx_msg;
- return NULL;
- }
-
- if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
- list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
- return NULL;
- }
-
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- /* There is a noop zc-ack can be piggybacked */
- tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
- ksocknal_next_tx_carrier(conn);
-
- /* use new_tx to replace the noop zc-ack packet */
- list_add(&tx_msg->tx_list, &tx->tx_list);
- list_del(&tx->tx_list);
-
- return tx;
-}
-
-static int
-ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
- struct ksock_tx *tx_ack, __u64 cookie)
-{
- struct ksock_tx *tx;
-
- if (conn->ksnc_type != SOCKLND_CONN_ACK)
- return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
-
- /* non-blocking ZC-ACK (to router) */
- LASSERT(!tx_ack ||
- tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- tx = conn->ksnc_tx_carrier;
- if (!tx) {
- if (tx_ack) {
- list_add_tail(&tx_ack->tx_list,
- &conn->ksnc_tx_queue);
- conn->ksnc_tx_carrier = tx_ack;
- }
- return 0;
- }
-
- /* conn->ksnc_tx_carrier */
-
- if (tx_ack)
- cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
-
- if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
- return 1;
-
- if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
- /* replace the keepalive PING with a real ACK */
- LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- return 1;
- }
-
- if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
- cookie == tx->tx_msg.ksm_zc_cookies[1]) {
- CWARN("%s: duplicated ZC cookie: %llu\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
- return 1; /* XXX return error in the future */
- }
-
- if (!tx->tx_msg.ksm_zc_cookies[0]) {
- /*
- * NOOP tx has only one ZC-ACK cookie,
- * can carry at least one more
- */
- if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
- tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- } else {
- tx->tx_msg.ksm_zc_cookies[0] = cookie;
- }
-
- if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
- /*
- * not likely to carry more ACKs, skip it
- * to simplify logic
- */
- ksocknal_next_tx_carrier(conn);
- }
-
- return 1;
- }
-
- /* takes two or more cookies already */
-
- if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
- __u64 tmp = 0;
-
- /* two separated cookies: (a+2, a) or (a+1, a) */
- LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
- tx->tx_msg.ksm_zc_cookies[1] <= 2);
-
- if (tx->tx_msg.ksm_zc_cookies[0] -
- tx->tx_msg.ksm_zc_cookies[1] == 2) {
- if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
- tmp = cookie;
- } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
- tmp = tx->tx_msg.ksm_zc_cookies[1];
- } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
- tmp = tx->tx_msg.ksm_zc_cookies[0];
- }
-
- if (tmp) {
- /* range of cookies */
- tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
- tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
- return 1;
- }
-
- } else {
- /*
- * ksm_zc_cookies[0] < ksm_zc_cookies[1],
- * it is range of cookies
- */
- if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
- cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
- CWARN("%s: duplicated ZC cookie: %llu\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
- return 1; /* XXX: return error in the future */
- }
-
- if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- return 1;
- }
-
- if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
- tx->tx_msg.ksm_zc_cookies[0] = cookie;
- return 1;
- }
- }
-
- /* failed to piggyback ZC-ACK */
- if (tx_ack) {
- list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
- /* the next tx can piggyback at least 1 ACK */
- ksocknal_next_tx_carrier(conn);
- }
-
- return 0;
-}
-
-static int
-ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
-{
- int nob;
-
-#if SOCKNAL_VERSION_DEBUG
- if (!*ksocknal_tunables.ksnd_typed_conns)
- return SOCKNAL_MATCH_YES;
-#endif
-
- if (!tx || !tx->tx_lnetmsg) {
- /* noop packet */
- nob = offsetof(struct ksock_msg, ksm_u);
- } else {
- nob = tx->tx_lnetmsg->msg_len +
- ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
- sizeof(struct lnet_hdr) : sizeof(struct ksock_msg));
- }
-
- /* default checking for typed connection */
- switch (conn->ksnc_type) {
- default:
- CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
- LBUG();
- case SOCKLND_CONN_ANY:
- return SOCKNAL_MATCH_YES;
-
- case SOCKLND_CONN_BULK_IN:
- return SOCKNAL_MATCH_MAY;
-
- case SOCKLND_CONN_BULK_OUT:
- if (nob < *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
-
- case SOCKLND_CONN_CONTROL:
- if (nob >= *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
- }
-}
-
-static int
-ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
-{
- int nob;
-
- if (!tx || !tx->tx_lnetmsg)
- nob = offsetof(struct ksock_msg, ksm_u);
- else
- nob = tx->tx_lnetmsg->msg_len + sizeof(struct ksock_msg);
-
- switch (conn->ksnc_type) {
- default:
- CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
- LBUG();
- case SOCKLND_CONN_ANY:
- return SOCKNAL_MATCH_NO;
-
- case SOCKLND_CONN_ACK:
- if (nonblk)
- return SOCKNAL_MATCH_YES;
- else if (!tx || !tx->tx_lnetmsg)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_NO;
-
- case SOCKLND_CONN_BULK_OUT:
- if (nonblk)
- return SOCKNAL_MATCH_NO;
- else if (nob < *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
-
- case SOCKLND_CONN_CONTROL:
- if (nonblk)
- return SOCKNAL_MATCH_NO;
- else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
- }
-}
-
-/* (Sink) handle incoming ZC request from sender */
-static int
-ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote)
-{
- struct ksock_peer *peer = c->ksnc_peer;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
- int rc;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
- if (conn) {
- struct ksock_sched *sched = conn->ksnc_scheduler;
-
- LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
-
- spin_lock_bh(&sched->kss_lock);
-
- rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
-
- spin_unlock_bh(&sched->kss_lock);
-
- if (rc) { /* piggybacked */
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return 0;
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- /* ACK connection is not ready, or can't piggyback the ACK */
- tx = ksocknal_alloc_tx_noop(cookie, !!remote);
- if (!tx)
- return -ENOMEM;
-
- rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
- if (!rc)
- return 0;
-
- ksocknal_free_tx(tx);
- return rc;
-}
-
-/* (Sender) handle ZC_ACK from sink */
-static int
-ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
-{
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_tx *tx;
- struct ksock_tx *temp;
- struct ksock_tx *tmp;
- LIST_HEAD(zlist);
- int count;
-
- if (!cookie1)
- cookie1 = cookie2;
-
- count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
-
- if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
- conn->ksnc_proto == &ksocknal_protocol_v3x) {
- /* keepalive PING for V3.x, just ignore it */
- return count == 1 ? 0 : -EPROTO;
- }
-
- spin_lock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list,
- tx_zc_list) {
- __u64 c = tx->tx_msg.ksm_zc_cookies[0];
-
- if (c == cookie1 || c == cookie2 ||
- (cookie1 < c && c < cookie2)) {
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- list_del(&tx->tx_zc_list);
- list_add(&tx->tx_zc_list, &zlist);
-
- if (!--count)
- break;
- }
- }
-
- spin_unlock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
- list_del(&tx->tx_zc_list);
- ksocknal_tx_decref(tx);
- }
-
- return !count ? 0 : -EPROTO;
-}
-
-static int
-ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello)
-{
- struct socket *sock = conn->ksnc_sock;
- struct lnet_hdr *hdr;
- struct lnet_magicversion *hmv;
- int rc;
- int i;
-
- BUILD_BUG_ON(sizeof(struct lnet_magicversion) != offsetof(struct lnet_hdr, src_nid));
-
- hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
- if (!hdr) {
- CERROR("Can't allocate struct lnet_hdr\n");
- return -ENOMEM;
- }
-
- hmv = (struct lnet_magicversion *)&hdr->dest_nid;
-
- /*
- * Re-organize V2.x message header to V1.x (struct lnet_hdr)
- * header and send out
- */
- hmv->magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC);
- hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR);
- hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR);
-
- if (the_lnet.ln_testprotocompat) {
- /* single-shot proto check */
- LNET_LOCK();
- if (the_lnet.ln_testprotocompat & 1) {
- hmv->version_major++; /* just different! */
- the_lnet.ln_testprotocompat &= ~1;
- }
- if (the_lnet.ln_testprotocompat & 2) {
- hmv->magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- hdr->src_nid = cpu_to_le64(hello->kshm_src_nid);
- hdr->src_pid = cpu_to_le32(hello->kshm_src_pid);
- hdr->type = cpu_to_le32(LNET_MSG_HELLO);
- hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32));
- hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype);
- hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation);
-
- rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
- rc, &conn->ksnc_ipaddr, conn->ksnc_port);
- goto out;
- }
-
- if (!hello->kshm_nips)
- goto out;
-
- for (i = 0; i < (int)hello->kshm_nips; i++)
- hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]);
-
- rc = lnet_sock_write(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32),
- lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
- rc, hello->kshm_nips,
- &conn->ksnc_ipaddr, conn->ksnc_port);
- }
-out:
- kfree(hdr);
-
- return rc;
-}
-
-static int
-ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello)
-{
- struct socket *sock = conn->ksnc_sock;
- int rc;
-
- hello->kshm_magic = LNET_PROTO_MAGIC;
- hello->kshm_version = conn->ksnc_proto->pro_version;
-
- if (the_lnet.ln_testprotocompat) {
- /* single-shot proto check */
- LNET_LOCK();
- if (the_lnet.ln_testprotocompat & 1) {
- hello->kshm_version++; /* just different! */
- the_lnet.ln_testprotocompat &= ~1;
- }
- LNET_UNLOCK();
- }
-
- rc = lnet_sock_write(sock, hello, offsetof(struct ksock_hello_msg, kshm_ips),
- lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
- rc, &conn->ksnc_ipaddr, conn->ksnc_port);
- return rc;
- }
-
- if (!hello->kshm_nips)
- return 0;
-
- rc = lnet_sock_write(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32),
- lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
- rc, hello->kshm_nips,
- &conn->ksnc_ipaddr, conn->ksnc_port);
- }
-
- return rc;
-}
-
-static int
-ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello,
- int timeout)
-{
- struct socket *sock = conn->ksnc_sock;
- struct lnet_hdr *hdr;
- int rc;
- int i;
-
- hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
- if (!hdr) {
- CERROR("Can't allocate struct lnet_hdr\n");
- return -ENOMEM;
- }
-
- rc = lnet_sock_read(sock, &hdr->src_nid,
- sizeof(*hdr) - offsetof(struct lnet_hdr, src_nid),
- timeout);
- if (rc) {
- CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- goto out;
- }
-
- /* ...and check we got what we expected */
- if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) {
- CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
- le32_to_cpu(hdr->type),
- &conn->ksnc_ipaddr);
- rc = -EPROTO;
- goto out;
- }
-
- hello->kshm_src_nid = le64_to_cpu(hdr->src_nid);
- hello->kshm_src_pid = le32_to_cpu(hdr->src_pid);
- hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
- hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type);
- hello->kshm_nips = le32_to_cpu(hdr->payload_length) /
- sizeof(__u32);
-
- if (hello->kshm_nips > LNET_MAX_INTERFACES) {
- CERROR("Bad nips %d from ip %pI4h\n",
- hello->kshm_nips, &conn->ksnc_ipaddr);
- rc = -EPROTO;
- goto out;
- }
-
- if (!hello->kshm_nips)
- goto out;
-
- rc = lnet_sock_read(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32), timeout);
- if (rc) {
- CERROR("Error %d reading IPs from ip %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- goto out;
- }
-
- for (i = 0; i < (int)hello->kshm_nips; i++) {
- hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
-
- if (!hello->kshm_ips[i]) {
- CERROR("Zero IP[%d] from ip %pI4h\n",
- i, &conn->ksnc_ipaddr);
- rc = -EPROTO;
- break;
- }
- }
-out:
- kfree(hdr);
-
- return rc;
-}
-
-static int
-ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello,
- int timeout)
-{
- struct socket *sock = conn->ksnc_sock;
- int rc;
- int i;
-
- if (hello->kshm_magic == LNET_PROTO_MAGIC)
- conn->ksnc_flip = 0;
- else
- conn->ksnc_flip = 1;
-
- rc = lnet_sock_read(sock, &hello->kshm_src_nid,
- offsetof(struct ksock_hello_msg, kshm_ips) -
- offsetof(struct ksock_hello_msg, kshm_src_nid),
- timeout);
- if (rc) {
- CERROR("Error %d reading HELLO from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- return rc;
- }
-
- if (conn->ksnc_flip) {
- __swab32s(&hello->kshm_src_pid);
- __swab64s(&hello->kshm_src_nid);
- __swab32s(&hello->kshm_dst_pid);
- __swab64s(&hello->kshm_dst_nid);
- __swab64s(&hello->kshm_src_incarnation);
- __swab64s(&hello->kshm_dst_incarnation);
- __swab32s(&hello->kshm_ctype);
- __swab32s(&hello->kshm_nips);
- }
-
- if (hello->kshm_nips > LNET_MAX_INTERFACES) {
- CERROR("Bad nips %d from ip %pI4h\n",
- hello->kshm_nips, &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- if (!hello->kshm_nips)
- return 0;
-
- rc = lnet_sock_read(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32), timeout);
- if (rc) {
- CERROR("Error %d reading IPs from ip %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- return rc;
- }
-
- for (i = 0; i < (int)hello->kshm_nips; i++) {
- if (conn->ksnc_flip)
- __swab32s(&hello->kshm_ips[i]);
-
- if (!hello->kshm_ips[i]) {
- CERROR("Zero IP[%d] from ip %pI4h\n",
- i, &conn->ksnc_ipaddr);
- return -EPROTO;
- }
- }
-
- return 0;
-}
-
-static void
-ksocknal_pack_msg_v1(struct ksock_tx *tx)
-{
- /* V1.x has no KSOCK_MSG_NOOP */
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
- LASSERT(tx->tx_lnetmsg);
-
- tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
- tx->tx_iov[0].iov_len = sizeof(struct lnet_hdr);
-
- tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
- tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
-}
-
-static void
-ksocknal_pack_msg_v2(struct ksock_tx *tx)
-{
- tx->tx_iov[0].iov_base = &tx->tx_msg;
-
- if (tx->tx_lnetmsg) {
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
-
- tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
- tx->tx_iov[0].iov_len = sizeof(struct ksock_msg);
- tx->tx_nob = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
- tx->tx_resid = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
- } else {
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- tx->tx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
- tx->tx_nob = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
- tx->tx_resid = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
- }
- /*
- * Don't checksum before start sending, because packet can be
- * piggybacked with ACK
- */
-}
-
-static void
-ksocknal_unpack_msg_v1(struct ksock_msg *msg)
-{
- msg->ksm_csum = 0;
- msg->ksm_type = KSOCK_MSG_LNET;
- msg->ksm_zc_cookies[0] = 0;
- msg->ksm_zc_cookies[1] = 0;
-}
-
-static void
-ksocknal_unpack_msg_v2(struct ksock_msg *msg)
-{
- return; /* Do nothing */
-}
-
-struct ksock_proto ksocknal_protocol_v1x = {
- .pro_version = KSOCK_PROTO_V1,
- .pro_send_hello = ksocknal_send_hello_v1,
- .pro_recv_hello = ksocknal_recv_hello_v1,
- .pro_pack = ksocknal_pack_msg_v1,
- .pro_unpack = ksocknal_unpack_msg_v1,
- .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1,
- .pro_handle_zcreq = NULL,
- .pro_handle_zcack = NULL,
- .pro_queue_tx_zcack = NULL,
- .pro_match_tx = ksocknal_match_tx
-};
-
-struct ksock_proto ksocknal_protocol_v2x = {
- .pro_version = KSOCK_PROTO_V2,
- .pro_send_hello = ksocknal_send_hello_v2,
- .pro_recv_hello = ksocknal_recv_hello_v2,
- .pro_pack = ksocknal_pack_msg_v2,
- .pro_unpack = ksocknal_unpack_msg_v2,
- .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
- .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
- .pro_handle_zcreq = ksocknal_handle_zcreq,
- .pro_handle_zcack = ksocknal_handle_zcack,
- .pro_match_tx = ksocknal_match_tx
-};
-
-struct ksock_proto ksocknal_protocol_v3x = {
- .pro_version = KSOCK_PROTO_V3,
- .pro_send_hello = ksocknal_send_hello_v2,
- .pro_recv_hello = ksocknal_recv_hello_v2,
- .pro_pack = ksocknal_pack_msg_v2,
- .pro_unpack = ksocknal_unpack_msg_v2,
- .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
- .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
- .pro_handle_zcreq = ksocknal_handle_zcreq,
- .pro_handle_zcack = ksocknal_handle_zcack,
- .pro_match_tx = ksocknal_match_tx_v3
-};
diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile
deleted file mode 100644
index b7dc7ac11cc5..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += libcfs.o
-
-libcfs-linux-objs := linux-tracefile.o linux-debug.o
-libcfs-linux-objs += linux-cpu.o
-libcfs-linux-objs += linux-module.o
-libcfs-linux-objs += linux-crypto.o
-libcfs-linux-objs += linux-crypto-adler.o
-
-libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
-
-libcfs-all-objs := debug.o fail.o module.o tracefile.o \
- libcfs_string.o hash.o \
- libcfs_cpu.o libcfs_mem.o libcfs_lock.o
-
-libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
deleted file mode 100644
index 1371224a8cb9..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ /dev/null
@@ -1,458 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- */
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-#include "tracefile.h"
-
-static char debug_file_name[1024];
-
-unsigned int libcfs_subsystem_debug = ~0;
-EXPORT_SYMBOL(libcfs_subsystem_debug);
-module_param(libcfs_subsystem_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
-
-unsigned int libcfs_debug = (D_CANTMASK |
- D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
-EXPORT_SYMBOL(libcfs_debug);
-module_param(libcfs_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
-
-static int libcfs_param_debug_mb_set(const char *val,
- const struct kernel_param *kp)
-{
- int rc;
- unsigned int num;
-
- rc = kstrtouint(val, 0, &num);
- if (rc < 0)
- return rc;
-
- if (!*((unsigned int *)kp->arg)) {
- *((unsigned int *)kp->arg) = num;
- return 0;
- }
-
- rc = cfs_trace_set_debug_mb(num);
-
- if (!rc)
- *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb();
-
- return rc;
-}
-
-/* While debug_mb setting look like unsigned int, in fact
- * it needs quite a bunch of extra processing, so we define special
- * debugmb parameter type with corresponding methods to handle this case
- */
-static const struct kernel_param_ops param_ops_debugmb = {
- .set = libcfs_param_debug_mb_set,
- .get = param_get_uint,
-};
-
-#define param_check_debugmb(name, p) \
- __param_check(name, p, unsigned int)
-
-static unsigned int libcfs_debug_mb;
-module_param(libcfs_debug_mb, debugmb, 0644);
-MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size.");
-
-unsigned int libcfs_printk = D_CANTMASK;
-module_param(libcfs_printk, uint, 0644);
-MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask");
-
-unsigned int libcfs_console_ratelimit = 1;
-module_param(libcfs_console_ratelimit, uint, 0644);
-MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)");
-
-static int param_set_delay_minmax(const char *val,
- const struct kernel_param *kp,
- long min, long max)
-{
- long d;
- int sec;
- int rc;
-
- rc = kstrtoint(val, 0, &sec);
- if (rc)
- return -EINVAL;
-
- d = sec * HZ / 100;
- if (d < min || d > max)
- return -EINVAL;
-
- *((unsigned int *)kp->arg) = d;
-
- return 0;
-}
-
-static int param_get_delay(char *buffer, const struct kernel_param *kp)
-{
- unsigned int d = *(unsigned int *)kp->arg;
-
- return sprintf(buffer, "%u", (unsigned int)cfs_duration_sec(d * 100));
-}
-
-unsigned int libcfs_console_max_delay;
-unsigned int libcfs_console_min_delay;
-
-static int param_set_console_max_delay(const char *val,
- const struct kernel_param *kp)
-{
- return param_set_delay_minmax(val, kp,
- libcfs_console_min_delay, INT_MAX);
-}
-
-static const struct kernel_param_ops param_ops_console_max_delay = {
- .set = param_set_console_max_delay,
- .get = param_get_delay,
-};
-
-#define param_check_console_max_delay(name, p) \
- __param_check(name, p, unsigned int)
-
-module_param(libcfs_console_max_delay, console_max_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)");
-
-static int param_set_console_min_delay(const char *val,
- const struct kernel_param *kp)
-{
- return param_set_delay_minmax(val, kp,
- 1, libcfs_console_max_delay);
-}
-
-static const struct kernel_param_ops param_ops_console_min_delay = {
- .set = param_set_console_min_delay,
- .get = param_get_delay,
-};
-
-#define param_check_console_min_delay(name, p) \
- __param_check(name, p, unsigned int)
-
-module_param(libcfs_console_min_delay, console_min_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)");
-
-static int param_set_uint_minmax(const char *val,
- const struct kernel_param *kp,
- unsigned int min, unsigned int max)
-{
- unsigned int num;
- int ret;
-
- if (!val)
- return -EINVAL;
- ret = kstrtouint(val, 0, &num);
- if (ret < 0 || num < min || num > max)
- return -EINVAL;
- *((unsigned int *)kp->arg) = num;
- return 0;
-}
-
-static int param_set_uintpos(const char *val, const struct kernel_param *kp)
-{
- return param_set_uint_minmax(val, kp, 1, -1);
-}
-
-static const struct kernel_param_ops param_ops_uintpos = {
- .set = param_set_uintpos,
- .get = param_get_uint,
-};
-
-#define param_check_uintpos(name, p) \
- __param_check(name, p, unsigned int)
-
-unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
-module_param(libcfs_console_backoff, uintpos, 0644);
-MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor");
-
-unsigned int libcfs_debug_binary = 1;
-
-unsigned int libcfs_stack = 3 * THREAD_SIZE / 4;
-EXPORT_SYMBOL(libcfs_stack);
-
-unsigned int libcfs_catastrophe;
-EXPORT_SYMBOL(libcfs_catastrophe);
-
-unsigned int libcfs_panic_on_lbug = 1;
-module_param(libcfs_panic_on_lbug, uint, 0644);
-MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
-
-static wait_queue_head_t debug_ctlwq;
-
-char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
-
-/* We need to pass a pointer here, but elsewhere this must be a const */
-static char *libcfs_debug_file_path;
-module_param(libcfs_debug_file_path, charp, 0644);
-MODULE_PARM_DESC(libcfs_debug_file_path,
- "Path for dumping debug logs, set 'NONE' to prevent log dumping");
-
-int libcfs_panic_in_progress;
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_subsys2str(int subsys)
-{
- static const char * const libcfs_debug_subsystems[] =
- LIBCFS_DEBUG_SUBSYS_NAMES;
-
- if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
- return NULL;
-
- return libcfs_debug_subsystems[subsys];
-}
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_dbg2str(int debug)
-{
- static const char * const libcfs_debug_masks[] =
- LIBCFS_DEBUG_MASKS_NAMES;
-
- if (debug >= ARRAY_SIZE(libcfs_debug_masks))
- return NULL;
-
- return libcfs_debug_masks[debug];
-}
-
-int
-libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
-{
- const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
- libcfs_debug_dbg2str;
- int len = 0;
- const char *token;
- int i;
-
- if (!mask) { /* "0" */
- if (size > 0)
- str[0] = '0';
- len = 1;
- } else { /* space-separated tokens */
- for (i = 0; i < 32; i++) {
- if (!(mask & (1 << i)))
- continue;
-
- token = fn(i);
- if (!token) /* unused bit */
- continue;
-
- if (len > 0) { /* separator? */
- if (len < size)
- str[len] = ' ';
- len++;
- }
-
- while (*token) {
- if (len < size)
- str[len] = *token;
- token++;
- len++;
- }
- }
- }
-
- /* terminate 'str' */
- if (len < size)
- str[len] = 0;
- else
- str[size - 1] = 0;
-
- return len;
-}
-
-int
-libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
-{
- const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
- libcfs_debug_dbg2str;
- int m = 0;
- int matched;
- int n;
- int t;
-
- /* Allow a number for backwards compatibility */
-
- for (n = strlen(str); n > 0; n--)
- if (!isspace(str[n - 1]))
- break;
- matched = n;
- t = sscanf(str, "%i%n", &m, &matched);
- if (t >= 1 && matched == n) {
- /* don't print warning for lctl set_param debug=0 or -1 */
- if (m && m != -1)
- CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
- *mask = m;
- return 0;
- }
-
- return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK,
- 0xffffffff);
-}
-
-/**
- * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
- */
-void libcfs_debug_dumplog_internal(void *arg)
-{
- static time64_t last_dump_time;
- time64_t current_time;
- void *journal_info;
-
- journal_info = current->journal_info;
- current->journal_info = NULL;
- current_time = ktime_get_real_seconds();
-
- if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) &&
- current_time > last_dump_time) {
- last_dump_time = current_time;
- snprintf(debug_file_name, sizeof(debug_file_name) - 1,
- "%s.%lld.%ld", libcfs_debug_file_path_arr,
- (s64)current_time, (long)arg);
- pr_alert("LustreError: dumping log to %s\n", debug_file_name);
- cfs_tracefile_dump_all_pages(debug_file_name);
- libcfs_run_debug_log_upcall(debug_file_name);
- }
-
- current->journal_info = journal_info;
-}
-
-static int libcfs_debug_dumplog_thread(void *arg)
-{
- libcfs_debug_dumplog_internal(arg);
- wake_up(&debug_ctlwq);
- return 0;
-}
-
-void libcfs_debug_dumplog(void)
-{
- wait_queue_entry_t wait;
- struct task_struct *dumper;
-
- /* we're being careful to ensure that the kernel thread is
- * able to set our state to running as it exits before we
- * get to schedule()
- */
- init_waitqueue_entry(&wait, current);
- add_wait_queue(&debug_ctlwq, &wait);
-
- dumper = kthread_run(libcfs_debug_dumplog_thread,
- (void *)(long)current_pid(),
- "libcfs_debug_dumper");
- set_current_state(TASK_INTERRUPTIBLE);
- if (IS_ERR(dumper))
- pr_err("LustreError: cannot start log dump thread: %ld\n",
- PTR_ERR(dumper));
- else
- schedule();
-
- /* be sure to teardown if cfs_create_thread() failed */
- remove_wait_queue(&debug_ctlwq, &wait);
- set_current_state(TASK_RUNNING);
-}
-EXPORT_SYMBOL(libcfs_debug_dumplog);
-
-int libcfs_debug_init(unsigned long bufsize)
-{
- unsigned int max = libcfs_debug_mb;
- int rc = 0;
-
- init_waitqueue_head(&debug_ctlwq);
-
- if (libcfs_console_max_delay <= 0 || /* not set by user or */
- libcfs_console_min_delay <= 0 || /* set to invalid values */
- libcfs_console_min_delay >= libcfs_console_max_delay) {
- libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
- libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
- }
-
- if (libcfs_debug_file_path) {
- strlcpy(libcfs_debug_file_path_arr,
- libcfs_debug_file_path,
- sizeof(libcfs_debug_file_path_arr));
- }
-
- /* If libcfs_debug_mb is set to an invalid value or uninitialized
- * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES
- */
- if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
- max = TCD_MAX_PAGES;
- } else {
- max = max / num_possible_cpus();
- max <<= (20 - PAGE_SHIFT);
- }
-
- rc = cfs_tracefile_init(max);
- if (!rc) {
- libcfs_register_panic_notifier();
- libcfs_debug_mb = cfs_trace_get_debug_mb();
- }
-
- return rc;
-}
-
-int libcfs_debug_cleanup(void)
-{
- libcfs_unregister_panic_notifier();
- cfs_tracefile_exit();
- return 0;
-}
-
-int libcfs_debug_clear_buffer(void)
-{
- cfs_trace_flush_pages();
- return 0;
-}
-
-/* Debug markers, although printed by S_LNET should not be marked as such. */
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_UNDEFINED
-int libcfs_debug_mark_buffer(const char *text)
-{
- CDEBUG(D_TRACE,
- "***************************************************\n");
- LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
- CDEBUG(D_TRACE,
- "***************************************************\n");
-
- return 0;
-}
-
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_LNET
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
deleted file mode 100644
index d3f1e866c6a7..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Oracle Corporation, Inc.
- */
-
-#include <linux/libcfs/libcfs.h>
-
-unsigned long cfs_fail_loc;
-EXPORT_SYMBOL(cfs_fail_loc);
-
-unsigned int cfs_fail_val;
-EXPORT_SYMBOL(cfs_fail_val);
-
-int cfs_fail_err;
-EXPORT_SYMBOL(cfs_fail_err);
-
-DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq);
-EXPORT_SYMBOL(cfs_race_waitq);
-
-int cfs_race_state;
-EXPORT_SYMBOL(cfs_race_state);
-
-int __cfs_fail_check_set(u32 id, u32 value, int set)
-{
- static atomic_t cfs_fail_count = ATOMIC_INIT(0);
-
- LASSERT(!(id & CFS_FAIL_ONCE));
-
- if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) ==
- (CFS_FAILED | CFS_FAIL_ONCE)) {
- atomic_set(&cfs_fail_count, 0); /* paranoia */
- return 0;
- }
-
- /* Fail 1/cfs_fail_val times */
- if (cfs_fail_loc & CFS_FAIL_RAND) {
- if (cfs_fail_val < 2 || prandom_u32_max(cfs_fail_val) > 0)
- return 0;
- }
-
- /* Skip the first cfs_fail_val, then fail */
- if (cfs_fail_loc & CFS_FAIL_SKIP) {
- if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val)
- return 0;
- }
-
- /* check cfs_fail_val... */
- if (set == CFS_FAIL_LOC_VALUE) {
- if (cfs_fail_val != -1 && cfs_fail_val != value)
- return 0;
- }
-
- /* Fail cfs_fail_val times, overridden by FAIL_ONCE */
- if (cfs_fail_loc & CFS_FAIL_SOME &&
- (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) {
- int count = atomic_inc_return(&cfs_fail_count);
-
- if (count >= cfs_fail_val) {
- set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
- atomic_set(&cfs_fail_count, 0);
- /* we are lost race to increase */
- if (count > cfs_fail_val)
- return 0;
- }
- }
-
- /* Take into account the current call for FAIL_ONCE for ORSET only,
- * as RESET is a new fail_loc, it does not change the current call
- */
- if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE))
- set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
- /* Lost race to set CFS_FAILED_BIT. */
- if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
- /* If CFS_FAIL_ONCE is valid, only one process can fail,
- * otherwise multi-process can fail at the same time.
- */
- if (cfs_fail_loc & CFS_FAIL_ONCE)
- return 0;
- }
-
- switch (set) {
- case CFS_FAIL_LOC_NOSET:
- case CFS_FAIL_LOC_VALUE:
- break;
- case CFS_FAIL_LOC_ORSET:
- cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE);
- break;
- case CFS_FAIL_LOC_RESET:
- cfs_fail_loc = value;
- atomic_set(&cfs_fail_count, 0);
- break;
- default:
- LASSERTF(0, "called with bad set %u\n", set);
- break;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(__cfs_fail_check_set);
-
-int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
-{
- int ret;
-
- ret = __cfs_fail_check_set(id, value, set);
- if (ret && likely(ms > 0)) {
- CERROR("cfs_fail_timeout id %x sleeping for %dms\n",
- id, ms);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ms * HZ / 1000);
- CERROR("cfs_fail_timeout id %x awake\n", id);
- }
- return ret;
-}
-EXPORT_SYMBOL(__cfs_fail_timeout_set);
diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c
deleted file mode 100644
index f7b3c9306456..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/hash.c
+++ /dev/null
@@ -1,2064 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/hash.c
- *
- * Implement a hash class for hash process in lustre system.
- *
- * Author: YuZhangyong <yzy@clusterfs.com>
- *
- * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
- * - Simplified API and improved documentation
- * - Added per-hash feature flags:
- * * CFS_HASH_DEBUG additional validation
- * * CFS_HASH_REHASH dynamic rehashing
- * - Added per-hash statistics
- * - General performance enhancements
- *
- * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
- * - move all stuff to libcfs
- * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
- * - ignore hs_rwlock if without CFS_HASH_REHASH setting
- * - buckets are allocated one by one(instead of contiguous memory),
- * to avoid unnecessary cacheline conflict
- *
- * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
- * - "bucket" is a group of hlist_head now, user can specify bucket size
- * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
- * one lock for reducing memory overhead.
- *
- * - support lockless hash, caller will take care of locks:
- * avoid lock overhead for hash tables that are already protected
- * by locking in the caller for another reason
- *
- * - support both spin_lock/rwlock for bucket:
- * overhead of spinlock contention is lower than read/write
- * contention of rwlock, so using spinlock to serialize operations on
- * bucket is more reasonable for those frequently changed hash tables
- *
- * - support one-single lock mode:
- * one lock to protect all hash operations to avoid overhead of
- * multiple locks if hash table is always small
- *
- * - removed a lot of unnecessary addref & decref on hash element:
- * addref & decref are atomic operations in many use-cases which
- * are expensive.
- *
- * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
- * some lustre use-cases require these functions to be strictly
- * non-blocking, we need to schedule required rehash on a different
- * thread on those cases.
- *
- * - safer rehash on large hash table
- * In old implementation, rehash function will exclusively lock the
- * hash table and finish rehash in one batch, it's dangerous on SMP
- * system because rehash millions of elements could take long time.
- * New implemented rehash can release lock and relax CPU in middle
- * of rehash, it's safe for another thread to search/change on the
- * hash table even it's in rehasing.
- *
- * - support two different refcount modes
- * . hash table has refcount on element
- * . hash table doesn't change refcount on adding/removing element
- *
- * - support long name hash table (for param-tree)
- *
- * - fix a bug for cfs_hash_rehash_key:
- * in old implementation, cfs_hash_rehash_key could screw up the
- * hash-table because @key is overwritten without any protection.
- * Now we need user to define hs_keycpy for those rehash enabled
- * hash tables, cfs_hash_rehash_key will overwrite hash-key
- * inside lock by calling hs_keycpy.
- *
- * - better hash iteration:
- * Now we support both locked iteration & lockless iteration of hash
- * table. Also, user can break the iteration by return 1 in callback.
- */
-#include <linux/seq_file.h>
-#include <linux/log2.h>
-
-#include <linux/libcfs/libcfs.h>
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static unsigned int warn_on_depth = 8;
-module_param(warn_on_depth, uint, 0644);
-MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high.");
-#endif
-
-struct workqueue_struct *cfs_rehash_wq;
-
-static inline void
-cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive)
- __acquires(&lock->spin)
-{
- spin_lock(&lock->spin);
-}
-
-static inline void
-cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive)
- __releases(&lock->spin)
-{
- spin_unlock(&lock->spin);
-}
-
-static inline void
-cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive)
- __acquires(&lock->rw)
-{
- if (!exclusive)
- read_lock(&lock->rw);
- else
- write_lock(&lock->rw);
-}
-
-static inline void
-cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive)
- __releases(&lock->rw)
-{
- if (!exclusive)
- read_unlock(&lock->rw);
- else
- write_unlock(&lock->rw);
-}
-
-/** No lock hash */
-static struct cfs_hash_lock_ops cfs_hash_nl_lops = {
- .hs_lock = cfs_hash_nl_lock,
- .hs_unlock = cfs_hash_nl_unlock,
- .hs_bkt_lock = cfs_hash_nl_lock,
- .hs_bkt_unlock = cfs_hash_nl_unlock,
-};
-
-/** no bucket lock, one spinlock to protect everything */
-static struct cfs_hash_lock_ops cfs_hash_nbl_lops = {
- .hs_lock = cfs_hash_spin_lock,
- .hs_unlock = cfs_hash_spin_unlock,
- .hs_bkt_lock = cfs_hash_nl_lock,
- .hs_bkt_unlock = cfs_hash_nl_unlock,
-};
-
-/** spin bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = {
- .hs_lock = cfs_hash_rw_lock,
- .hs_unlock = cfs_hash_rw_unlock,
- .hs_bkt_lock = cfs_hash_spin_lock,
- .hs_bkt_unlock = cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = {
- .hs_lock = cfs_hash_rw_lock,
- .hs_unlock = cfs_hash_rw_unlock,
- .hs_bkt_lock = cfs_hash_rw_lock,
- .hs_bkt_unlock = cfs_hash_rw_unlock,
-};
-
-/** spin bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = {
- .hs_lock = cfs_hash_nl_lock,
- .hs_unlock = cfs_hash_nl_unlock,
- .hs_bkt_lock = cfs_hash_spin_lock,
- .hs_bkt_unlock = cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = {
- .hs_lock = cfs_hash_nl_lock,
- .hs_unlock = cfs_hash_nl_unlock,
- .hs_bkt_lock = cfs_hash_rw_lock,
- .hs_bkt_unlock = cfs_hash_rw_unlock,
-};
-
-static void
-cfs_hash_lock_setup(struct cfs_hash *hs)
-{
- if (cfs_hash_with_no_lock(hs)) {
- hs->hs_lops = &cfs_hash_nl_lops;
-
- } else if (cfs_hash_with_no_bktlock(hs)) {
- hs->hs_lops = &cfs_hash_nbl_lops;
- spin_lock_init(&hs->hs_lock.spin);
-
- } else if (cfs_hash_with_rehash(hs)) {
- rwlock_init(&hs->hs_lock.rw);
-
- if (cfs_hash_with_rw_bktlock(hs))
- hs->hs_lops = &cfs_hash_bkt_rw_lops;
- else if (cfs_hash_with_spin_bktlock(hs))
- hs->hs_lops = &cfs_hash_bkt_spin_lops;
- else
- LBUG();
- } else {
- if (cfs_hash_with_rw_bktlock(hs))
- hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
- else if (cfs_hash_with_spin_bktlock(hs))
- hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
- else
- LBUG();
- }
-}
-
-/**
- * Simple hash head without depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head {
- struct hlist_head hh_head; /**< entries list */
-};
-
-static int
-cfs_hash_hh_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_head);
-}
-
-static struct hlist_head *
-cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_head *head;
-
- head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].hh_head;
-}
-
-static int
-cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
- return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- hlist_del_init(hnode);
- return -1; /* unknown depth */
-}
-
-/**
- * Simple hash head with depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head_dep {
- struct hlist_head hd_head; /**< entries list */
- unsigned int hd_depth; /**< list length */
-};
-
-static int
-cfs_hash_hd_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_head_dep);
-}
-
-static struct hlist_head *
-cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_head_dep *head;
-
- head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].hd_head;
-}
-
-static int
-cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_head_dep *hh;
-
- hh = container_of(cfs_hash_hd_hhead(hs, bd),
- struct cfs_hash_head_dep, hd_head);
- hlist_add_head(hnode, &hh->hd_head);
- return ++hh->hd_depth;
-}
-
-static int
-cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_head_dep *hh;
-
- hh = container_of(cfs_hash_hd_hhead(hs, bd),
- struct cfs_hash_head_dep, hd_head);
- hlist_del_init(hnode);
- return --hh->hd_depth;
-}
-
-/**
- * double links hash head without depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead {
- struct hlist_head dh_head; /**< entries list */
- struct hlist_node *dh_tail; /**< the last entry */
-};
-
-static int
-cfs_hash_dh_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_dhead);
-}
-
-static struct hlist_head *
-cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_dhead *head;
-
- head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].dh_head;
-}
-
-static int
-cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_dhead *dh;
-
- dh = container_of(cfs_hash_dh_hhead(hs, bd),
- struct cfs_hash_dhead, dh_head);
- if (dh->dh_tail) /* not empty */
- hlist_add_behind(hnode, dh->dh_tail);
- else /* empty list */
- hlist_add_head(hnode, &dh->dh_head);
- dh->dh_tail = hnode;
- return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnd)
-{
- struct cfs_hash_dhead *dh;
-
- dh = container_of(cfs_hash_dh_hhead(hs, bd),
- struct cfs_hash_dhead, dh_head);
- if (!hnd->next) { /* it's the tail */
- dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
- container_of(hnd->pprev, struct hlist_node, next);
- }
- hlist_del_init(hnd);
- return -1; /* unknown depth */
-}
-
-/**
- * double links hash head with depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead_dep {
- struct hlist_head dd_head; /**< entries list */
- struct hlist_node *dd_tail; /**< the last entry */
- unsigned int dd_depth; /**< list length */
-};
-
-static int
-cfs_hash_dd_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_dhead_dep);
-}
-
-static struct hlist_head *
-cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_dhead_dep *head;
-
- head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].dd_head;
-}
-
-static int
-cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_dhead_dep *dh;
-
- dh = container_of(cfs_hash_dd_hhead(hs, bd),
- struct cfs_hash_dhead_dep, dd_head);
- if (dh->dd_tail) /* not empty */
- hlist_add_behind(hnode, dh->dd_tail);
- else /* empty list */
- hlist_add_head(hnode, &dh->dd_head);
- dh->dd_tail = hnode;
- return ++dh->dd_depth;
-}
-
-static int
-cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnd)
-{
- struct cfs_hash_dhead_dep *dh;
-
- dh = container_of(cfs_hash_dd_hhead(hs, bd),
- struct cfs_hash_dhead_dep, dd_head);
- if (!hnd->next) { /* it's the tail */
- dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
- container_of(hnd->pprev, struct hlist_node, next);
- }
- hlist_del_init(hnd);
- return --dh->dd_depth;
-}
-
-static struct cfs_hash_hlist_ops cfs_hash_hh_hops = {
- .hop_hhead = cfs_hash_hh_hhead,
- .hop_hhead_size = cfs_hash_hh_hhead_size,
- .hop_hnode_add = cfs_hash_hh_hnode_add,
- .hop_hnode_del = cfs_hash_hh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_hd_hops = {
- .hop_hhead = cfs_hash_hd_hhead,
- .hop_hhead_size = cfs_hash_hd_hhead_size,
- .hop_hnode_add = cfs_hash_hd_hnode_add,
- .hop_hnode_del = cfs_hash_hd_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dh_hops = {
- .hop_hhead = cfs_hash_dh_hhead,
- .hop_hhead_size = cfs_hash_dh_hhead_size,
- .hop_hnode_add = cfs_hash_dh_hnode_add,
- .hop_hnode_del = cfs_hash_dh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dd_hops = {
- .hop_hhead = cfs_hash_dd_hhead,
- .hop_hhead_size = cfs_hash_dd_hhead_size,
- .hop_hnode_add = cfs_hash_dd_hnode_add,
- .hop_hnode_del = cfs_hash_dd_hnode_del,
-};
-
-static void
-cfs_hash_hlist_setup(struct cfs_hash *hs)
-{
- if (cfs_hash_with_add_tail(hs)) {
- hs->hs_hops = cfs_hash_with_depth(hs) ?
- &cfs_hash_dd_hops : &cfs_hash_dh_hops;
- } else {
- hs->hs_hops = cfs_hash_with_depth(hs) ?
- &cfs_hash_hd_hops : &cfs_hash_hh_hops;
- }
-}
-
-static void
-cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts,
- unsigned int bits, const void *key, struct cfs_hash_bd *bd)
-{
- unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
-
- LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
-
- bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
- bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
-}
-
-void
-cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
-{
- /* NB: caller should hold hs->hs_rwlock if REHASH is set */
- if (likely(!hs->hs_rehash_buckets)) {
- cfs_hash_bd_from_key(hs, hs->hs_buckets,
- hs->hs_cur_bits, key, bd);
- } else {
- LASSERT(hs->hs_rehash_bits);
- cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
- hs->hs_rehash_bits, key, bd);
- }
-}
-EXPORT_SYMBOL(cfs_hash_bd_get);
-
-static inline void
-cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
-{
- if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
- return;
-
- bd->bd_bucket->hsb_depmax = dep_cur;
-# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
- if (likely(!warn_on_depth ||
- max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
- return;
-
- spin_lock(&hs->hs_dep_lock);
- hs->hs_dep_max = dep_cur;
- hs->hs_dep_bkt = bd->bd_bucket->hsb_index;
- hs->hs_dep_off = bd->bd_offset;
- hs->hs_dep_bits = hs->hs_cur_bits;
- spin_unlock(&hs->hs_dep_lock);
-
- queue_work(cfs_rehash_wq, &hs->hs_dep_work);
-# endif
-}
-
-void
-cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- int rc;
-
- rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
- cfs_hash_bd_dep_record(hs, bd, rc);
- bd->bd_bucket->hsb_version++;
- if (unlikely(!bd->bd_bucket->hsb_version))
- bd->bd_bucket->hsb_version++;
- bd->bd_bucket->hsb_count++;
-
- if (cfs_hash_with_counter(hs))
- atomic_inc(&hs->hs_count);
- if (!cfs_hash_with_no_itemref(hs))
- cfs_hash_get(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_add_locked);
-
-void
-cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- hs->hs_hops->hop_hnode_del(hs, bd, hnode);
-
- LASSERT(bd->bd_bucket->hsb_count > 0);
- bd->bd_bucket->hsb_count--;
- bd->bd_bucket->hsb_version++;
- if (unlikely(!bd->bd_bucket->hsb_version))
- bd->bd_bucket->hsb_version++;
-
- if (cfs_hash_with_counter(hs)) {
- LASSERT(atomic_read(&hs->hs_count) > 0);
- atomic_dec(&hs->hs_count);
- }
- if (!cfs_hash_with_no_itemref(hs))
- cfs_hash_put_locked(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_del_locked);
-
-void
-cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
- struct cfs_hash_bd *bd_new, struct hlist_node *hnode)
-{
- struct cfs_hash_bucket *obkt = bd_old->bd_bucket;
- struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
- int rc;
-
- if (!cfs_hash_bd_compare(bd_old, bd_new))
- return;
-
- /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
- * in cfs_hash_bd_del/add_locked
- */
- hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
- rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
- cfs_hash_bd_dep_record(hs, bd_new, rc);
-
- LASSERT(obkt->hsb_count > 0);
- obkt->hsb_count--;
- obkt->hsb_version++;
- if (unlikely(!obkt->hsb_version))
- obkt->hsb_version++;
- nbkt->hsb_count++;
- nbkt->hsb_version++;
- if (unlikely(!nbkt->hsb_version))
- nbkt->hsb_version++;
-}
-
-enum {
- /** always set, for sanity (avoid ZERO intent) */
- CFS_HS_LOOKUP_MASK_FIND = BIT(0),
- /** return entry with a ref */
- CFS_HS_LOOKUP_MASK_REF = BIT(1),
- /** add entry if not existing */
- CFS_HS_LOOKUP_MASK_ADD = BIT(2),
- /** delete entry, ignore other masks */
- CFS_HS_LOOKUP_MASK_DEL = BIT(3),
-};
-
-enum cfs_hash_lookup_intent {
- /** return item w/o refcount */
- CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND,
- /** return item with refcount */
- CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND |
- CFS_HS_LOOKUP_MASK_REF),
- /** return item w/o refcount if existed, otherwise add */
- CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND |
- CFS_HS_LOOKUP_MASK_ADD),
- /** return item with refcount if existed, otherwise add */
- CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND |
- CFS_HS_LOOKUP_MASK_ADD),
- /** delete if existed */
- CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND |
- CFS_HS_LOOKUP_MASK_DEL)
-};
-
-static struct hlist_node *
-cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key, struct hlist_node *hnode,
- enum cfs_hash_lookup_intent intent)
-
-{
- struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
- struct hlist_node *ehnode;
- struct hlist_node *match;
- int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD;
-
- /* with this function, we can avoid a lot of useless refcount ops,
- * which are expensive atomic operations most time.
- */
- match = intent_add ? NULL : hnode;
- hlist_for_each(ehnode, hhead) {
- if (!cfs_hash_keycmp(hs, key, ehnode))
- continue;
-
- if (match && match != ehnode) /* can't match */
- continue;
-
- /* match and ... */
- if (intent & CFS_HS_LOOKUP_MASK_DEL) {
- cfs_hash_bd_del_locked(hs, bd, ehnode);
- return ehnode;
- }
-
- /* caller wants refcount? */
- if (intent & CFS_HS_LOOKUP_MASK_REF)
- cfs_hash_get(hs, ehnode);
- return ehnode;
- }
- /* no match item */
- if (!intent_add)
- return NULL;
-
- LASSERT(hnode);
- cfs_hash_bd_add_locked(hs, bd, hnode);
- return hnode;
-}
-
-struct hlist_node *
-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key)
-{
- return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
- CFS_HS_LOOKUP_IT_FIND);
-}
-EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
-
-struct hlist_node *
-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key)
-{
- return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
- CFS_HS_LOOKUP_IT_PEEK);
-}
-EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
-
-static void
-cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, int excl)
-{
- struct cfs_hash_bucket *prev = NULL;
- int i;
-
- /**
- * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
- * NB: it's possible that several bds point to the same bucket but
- * have different bd::bd_offset, so need take care of deadlock.
- */
- cfs_hash_for_each_bd(bds, n, i) {
- if (prev == bds[i].bd_bucket)
- continue;
-
- LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index);
- cfs_hash_bd_lock(hs, &bds[i], excl);
- prev = bds[i].bd_bucket;
- }
-}
-
-static void
-cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, int excl)
-{
- struct cfs_hash_bucket *prev = NULL;
- int i;
-
- cfs_hash_for_each_bd(bds, n, i) {
- if (prev != bds[i].bd_bucket) {
- cfs_hash_bd_unlock(hs, &bds[i], excl);
- prev = bds[i].bd_bucket;
- }
- }
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, const void *key)
-{
- struct hlist_node *ehnode;
- unsigned int i;
-
- cfs_hash_for_each_bd(bds, n, i) {
- ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
- CFS_HS_LOOKUP_IT_FIND);
- if (ehnode)
- return ehnode;
- }
- return NULL;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, const void *key,
- struct hlist_node *hnode, int noref)
-{
- struct hlist_node *ehnode;
- int intent;
- unsigned int i;
-
- LASSERT(hnode);
- intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK;
-
- cfs_hash_for_each_bd(bds, n, i) {
- ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
- NULL, intent);
- if (ehnode)
- return ehnode;
- }
-
- if (i == 1) { /* only one bucket */
- cfs_hash_bd_add_locked(hs, &bds[0], hnode);
- } else {
- struct cfs_hash_bd mybd;
-
- cfs_hash_bd_get(hs, key, &mybd);
- cfs_hash_bd_add_locked(hs, &mybd, hnode);
- }
-
- return hnode;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, const void *key,
- struct hlist_node *hnode)
-{
- struct hlist_node *ehnode;
- unsigned int i;
-
- cfs_hash_for_each_bd(bds, n, i) {
- ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
- CFS_HS_LOOKUP_IT_FINDDEL);
- if (ehnode)
- return ehnode;
- }
- return NULL;
-}
-
-static void
-cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
-{
- int rc;
-
- if (!bd2->bd_bucket)
- return;
-
- if (!bd1->bd_bucket) {
- *bd1 = *bd2;
- bd2->bd_bucket = NULL;
- return;
- }
-
- rc = cfs_hash_bd_compare(bd1, bd2);
- if (!rc)
- bd2->bd_bucket = NULL;
- else if (rc > 0)
- swap(*bd1, *bd2); /* swap bd1 and bd2 */
-}
-
-void
-cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
- struct cfs_hash_bd *bds)
-{
- /* NB: caller should hold hs_lock.rw if REHASH is set */
- cfs_hash_bd_from_key(hs, hs->hs_buckets,
- hs->hs_cur_bits, key, &bds[0]);
- if (likely(!hs->hs_rehash_buckets)) {
- /* no rehash or not rehashing */
- bds[1].bd_bucket = NULL;
- return;
- }
-
- LASSERT(hs->hs_rehash_bits);
- cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
- hs->hs_rehash_bits, key, &bds[1]);
-
- cfs_hash_bd_order(&bds[0], &bds[1]);
-}
-
-void
-cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
- cfs_hash_multi_bd_lock(hs, bds, 2, excl);
-}
-
-void
-cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
- cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key)
-{
- return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key, struct hlist_node *hnode,
- int noref)
-{
- return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
- hnode, noref);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key, struct hlist_node *hnode)
-{
- return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
-}
-
-static void
-cfs_hash_buckets_free(struct cfs_hash_bucket **buckets,
- int bkt_size, int prev_size, int size)
-{
- int i;
-
- for (i = prev_size; i < size; i++)
- kfree(buckets[i]);
-
- kvfree(buckets);
-}
-
-/*
- * Create or grow bucket memory. Return old_buckets if no allocation was
- * needed, the newly allocated buckets if allocation was needed and
- * successful, and NULL on error.
- */
-static struct cfs_hash_bucket **
-cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
- unsigned int old_size, unsigned int new_size)
-{
- struct cfs_hash_bucket **new_bkts;
- int i;
-
- LASSERT(!old_size || old_bkts);
-
- if (old_bkts && old_size == new_size)
- return old_bkts;
-
- new_bkts = kvmalloc_array(new_size, sizeof(new_bkts[0]), GFP_KERNEL);
- if (!new_bkts)
- return NULL;
-
- if (old_bkts) {
- memcpy(new_bkts, old_bkts,
- min(old_size, new_size) * sizeof(*old_bkts));
- }
-
- for (i = old_size; i < new_size; i++) {
- struct hlist_head *hhead;
- struct cfs_hash_bd bd;
-
- new_bkts[i] = kzalloc(cfs_hash_bkt_size(hs), GFP_KERNEL);
- if (!new_bkts[i]) {
- cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
- old_size, new_size);
- return NULL;
- }
-
- new_bkts[i]->hsb_index = i;
- new_bkts[i]->hsb_version = 1; /* shouldn't be zero */
- new_bkts[i]->hsb_depmax = -1; /* unknown */
- bd.bd_bucket = new_bkts[i];
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
- INIT_HLIST_HEAD(hhead);
-
- if (cfs_hash_with_no_lock(hs) ||
- cfs_hash_with_no_bktlock(hs))
- continue;
-
- if (cfs_hash_with_rw_bktlock(hs))
- rwlock_init(&new_bkts[i]->hsb_lock.rw);
- else if (cfs_hash_with_spin_bktlock(hs))
- spin_lock_init(&new_bkts[i]->hsb_lock.spin);
- else
- LBUG(); /* invalid use-case */
- }
- return new_bkts;
-}
-
-/**
- * Initialize new libcfs hash, where:
- * @name - Descriptive hash name
- * @cur_bits - Initial hash table size, in bits
- * @max_bits - Maximum allowed hash table resize, in bits
- * @ops - Registered hash table operations
- * @flags - CFS_HASH_REHASH enable synamic hash resizing
- * - CFS_HASH_SORT enable chained hash sort
- */
-static void cfs_hash_rehash_worker(struct work_struct *work);
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static void cfs_hash_dep_print(struct work_struct *work)
-{
- struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_dep_work);
- int dep;
- int bkt;
- int off;
- int bits;
-
- spin_lock(&hs->hs_dep_lock);
- dep = hs->hs_dep_max;
- bkt = hs->hs_dep_bkt;
- off = hs->hs_dep_off;
- bits = hs->hs_dep_bits;
- spin_unlock(&hs->hs_dep_lock);
-
- LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
- hs->hs_name, bits, dep, bkt, off);
- spin_lock(&hs->hs_dep_lock);
- hs->hs_dep_bits = 0; /* mark as workitem done */
- spin_unlock(&hs->hs_dep_lock);
- return 0;
-}
-
-static void cfs_hash_depth_wi_init(struct cfs_hash *hs)
-{
- spin_lock_init(&hs->hs_dep_lock);
- INIT_WORK(&hs->hs_dep_work, cfs_hash_dep_print);
-}
-
-static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
-{
- cancel_work_sync(&hs->hs_dep_work);
-}
-
-#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
-
-static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {}
-static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
-
-#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
-
-struct cfs_hash *
-cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
- unsigned int bkt_bits, unsigned int extra_bytes,
- unsigned int min_theta, unsigned int max_theta,
- struct cfs_hash_ops *ops, unsigned int flags)
-{
- struct cfs_hash *hs;
- int len;
-
- BUILD_BUG_ON(CFS_HASH_THETA_BITS >= 15);
-
- LASSERT(name);
- LASSERT(ops->hs_key);
- LASSERT(ops->hs_hash);
- LASSERT(ops->hs_object);
- LASSERT(ops->hs_keycmp);
- LASSERT(ops->hs_get);
- LASSERT(ops->hs_put || ops->hs_put_locked);
-
- if (flags & CFS_HASH_REHASH)
- flags |= CFS_HASH_COUNTER; /* must have counter */
-
- LASSERT(cur_bits > 0);
- LASSERT(cur_bits >= bkt_bits);
- LASSERT(max_bits >= cur_bits && max_bits < 31);
- LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits));
- LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK)));
- LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy));
-
- len = !(flags & CFS_HASH_BIGNAME) ?
- CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
- hs = kzalloc(offsetof(struct cfs_hash, hs_name[len]), GFP_KERNEL);
- if (!hs)
- return NULL;
-
- strlcpy(hs->hs_name, name, len);
- hs->hs_flags = flags;
-
- atomic_set(&hs->hs_refcount, 1);
- atomic_set(&hs->hs_count, 0);
-
- cfs_hash_lock_setup(hs);
- cfs_hash_hlist_setup(hs);
-
- hs->hs_cur_bits = (u8)cur_bits;
- hs->hs_min_bits = (u8)cur_bits;
- hs->hs_max_bits = (u8)max_bits;
- hs->hs_bkt_bits = (u8)bkt_bits;
-
- hs->hs_ops = ops;
- hs->hs_extra_bytes = extra_bytes;
- hs->hs_rehash_bits = 0;
- INIT_WORK(&hs->hs_rehash_work, cfs_hash_rehash_worker);
- cfs_hash_depth_wi_init(hs);
-
- if (cfs_hash_with_rehash(hs))
- __cfs_hash_set_theta(hs, min_theta, max_theta);
-
- hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
- CFS_HASH_NBKT(hs));
- if (hs->hs_buckets)
- return hs;
-
- kfree(hs);
- return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_create);
-
-/**
- * Cleanup libcfs hash @hs.
- */
-static void
-cfs_hash_destroy(struct cfs_hash *hs)
-{
- struct hlist_node *hnode;
- struct hlist_node *pos;
- struct cfs_hash_bd bd;
- int i;
-
- LASSERT(hs);
- LASSERT(!cfs_hash_is_exiting(hs) &&
- !cfs_hash_is_iterating(hs));
-
- /**
- * prohibit further rehashes, don't need any lock because
- * I'm the only (last) one can change it.
- */
- hs->hs_exiting = 1;
- if (cfs_hash_with_rehash(hs))
- cfs_hash_rehash_cancel(hs);
-
- cfs_hash_depth_wi_cancel(hs);
- /* rehash should be done/canceled */
- LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets);
-
- cfs_hash_for_each_bucket(hs, &bd, i) {
- struct hlist_head *hhead;
-
- LASSERT(bd.bd_bucket);
- /* no need to take this lock, just for consistent code */
- cfs_hash_bd_lock(hs, &bd, 1);
-
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
- hlist_for_each_safe(hnode, pos, hhead) {
- LASSERTF(!cfs_hash_with_assert_empty(hs),
- "hash %s bucket %u(%u) is not empty: %u items left\n",
- hs->hs_name, bd.bd_bucket->hsb_index,
- bd.bd_offset, bd.bd_bucket->hsb_count);
- /* can't assert key valicate, because we
- * can interrupt rehash
- */
- cfs_hash_bd_del_locked(hs, &bd, hnode);
- cfs_hash_exit(hs, hnode);
- }
- }
- LASSERT(!bd.bd_bucket->hsb_count);
- cfs_hash_bd_unlock(hs, &bd, 1);
- cond_resched();
- }
-
- LASSERT(!atomic_read(&hs->hs_count));
-
- cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
- 0, CFS_HASH_NBKT(hs));
- i = cfs_hash_with_bigname(hs) ?
- CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
- kfree(hs);
-}
-
-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs)
-{
- if (atomic_inc_not_zero(&hs->hs_refcount))
- return hs;
- return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_getref);
-
-void cfs_hash_putref(struct cfs_hash *hs)
-{
- if (atomic_dec_and_test(&hs->hs_refcount))
- cfs_hash_destroy(hs);
-}
-EXPORT_SYMBOL(cfs_hash_putref);
-
-static inline int
-cfs_hash_rehash_bits(struct cfs_hash *hs)
-{
- if (cfs_hash_with_no_lock(hs) ||
- !cfs_hash_with_rehash(hs))
- return -EOPNOTSUPP;
-
- if (unlikely(cfs_hash_is_exiting(hs)))
- return -ESRCH;
-
- if (unlikely(cfs_hash_is_rehashing(hs)))
- return -EALREADY;
-
- if (unlikely(cfs_hash_is_iterating(hs)))
- return -EAGAIN;
-
- /* XXX: need to handle case with max_theta != 2.0
- * and the case with min_theta != 0.5
- */
- if ((hs->hs_cur_bits < hs->hs_max_bits) &&
- (__cfs_hash_theta(hs) > hs->hs_max_theta))
- return hs->hs_cur_bits + 1;
-
- if (!cfs_hash_with_shrink(hs))
- return 0;
-
- if ((hs->hs_cur_bits > hs->hs_min_bits) &&
- (__cfs_hash_theta(hs) < hs->hs_min_theta))
- return hs->hs_cur_bits - 1;
-
- return 0;
-}
-
-/**
- * don't allow inline rehash if:
- * - user wants non-blocking change (add/del) on hash table
- * - too many elements
- */
-static inline int
-cfs_hash_rehash_inline(struct cfs_hash *hs)
-{
- return !cfs_hash_with_nblk_change(hs) &&
- atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key. The registered
- * ops->hs_get function will be called when the item is added.
- */
-void
-cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
- struct cfs_hash_bd bd;
- int bits;
-
- LASSERT(hlist_unhashed(hnode));
-
- cfs_hash_lock(hs, 0);
- cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
-
- cfs_hash_key_validate(hs, key, hnode);
- cfs_hash_bd_add_locked(hs, &bd, hnode);
-
- cfs_hash_bd_unlock(hs, &bd, 1);
-
- bits = cfs_hash_rehash_bits(hs);
- cfs_hash_unlock(hs, 0);
- if (bits > 0)
- cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-}
-EXPORT_SYMBOL(cfs_hash_add);
-
-static struct hlist_node *
-cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode, int noref)
-{
- struct hlist_node *ehnode;
- struct cfs_hash_bd bds[2];
- int bits = 0;
-
- LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode);
-
- cfs_hash_lock(hs, 0);
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
- cfs_hash_key_validate(hs, key, hnode);
- ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
- hnode, noref);
- cfs_hash_dual_bd_unlock(hs, bds, 1);
-
- if (ehnode == hnode) /* new item added */
- bits = cfs_hash_rehash_bits(hs);
- cfs_hash_unlock(hs, 0);
- if (bits > 0)
- cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
- return ehnode;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key. The registered
- * ops->hs_get function will be called if the item was added.
- * Returns 0 on success or -EALREADY on key collisions.
- */
-int
-cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode)
-{
- return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
- -EALREADY : 0;
-}
-EXPORT_SYMBOL(cfs_hash_add_unique);
-
-/**
- * Add item @hnode to libcfs hash @hs using @key. If this @key
- * already exists in the hash then ops->hs_get will be called on the
- * conflicting entry and that entry will be returned to the caller.
- * Otherwise ops->hs_get is called on the item which was added.
- */
-void *
-cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode)
-{
- hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
-
- return cfs_hash_object(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_findadd_unique);
-
-/**
- * Delete item @hnode from the libcfs hash @hs using @key. The @key
- * is required to ensure the correct hash bucket is locked since there
- * is no direct linkage from the item to the bucket. The object
- * removed from the hash will be returned and obs->hs_put is called
- * on the removed object.
- */
-void *
-cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
- void *obj = NULL;
- int bits = 0;
- struct cfs_hash_bd bds[2];
-
- cfs_hash_lock(hs, 0);
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
- /* NB: do nothing if @hnode is not in hash table */
- if (!hnode || !hlist_unhashed(hnode)) {
- if (!bds[1].bd_bucket && hnode) {
- cfs_hash_bd_del_locked(hs, &bds[0], hnode);
- } else {
- hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
- key, hnode);
- }
- }
-
- if (hnode) {
- obj = cfs_hash_object(hs, hnode);
- bits = cfs_hash_rehash_bits(hs);
- }
-
- cfs_hash_dual_bd_unlock(hs, bds, 1);
- cfs_hash_unlock(hs, 0);
- if (bits > 0)
- cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
- return obj;
-}
-EXPORT_SYMBOL(cfs_hash_del);
-
-/**
- * Delete item given @key in libcfs hash @hs. The first @key found in
- * the hash will be removed, if the key exists multiple times in the hash
- * @hs this function must be called once per key. The removed object
- * will be returned and ops->hs_put is called on the removed object.
- */
-void *
-cfs_hash_del_key(struct cfs_hash *hs, const void *key)
-{
- return cfs_hash_del(hs, key, NULL);
-}
-EXPORT_SYMBOL(cfs_hash_del_key);
-
-/**
- * Lookup an item using @key in the libcfs hash @hs and return it.
- * If the @key is found in the hash hs->hs_get() is called and the
- * matching objects is returned. It is the callers responsibility
- * to call the counterpart ops->hs_put using the cfs_hash_put() macro
- * when when finished with the object. If the @key was not found
- * in the hash @hs NULL is returned.
- */
-void *
-cfs_hash_lookup(struct cfs_hash *hs, const void *key)
-{
- void *obj = NULL;
- struct hlist_node *hnode;
- struct cfs_hash_bd bds[2];
-
- cfs_hash_lock(hs, 0);
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
- hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
- if (hnode)
- obj = cfs_hash_object(hs, hnode);
-
- cfs_hash_dual_bd_unlock(hs, bds, 0);
- cfs_hash_unlock(hs, 0);
-
- return obj;
-}
-EXPORT_SYMBOL(cfs_hash_lookup);
-
-static void
-cfs_hash_for_each_enter(struct cfs_hash *hs)
-{
- LASSERT(!cfs_hash_is_exiting(hs));
-
- if (!cfs_hash_with_rehash(hs))
- return;
- /*
- * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
- * because it's just an unreliable signal to rehash-thread,
- * rehash-thread will try to finish rehash ASAP when seeing this.
- */
- hs->hs_iterating = 1;
-
- cfs_hash_lock(hs, 1);
- hs->hs_iterators++;
- cfs_hash_unlock(hs, 1);
-
- /* NB: iteration is mostly called by service thread,
- * we tend to cancel pending rehash-request, instead of
- * blocking service thread, we will relaunch rehash request
- * after iteration
- */
- if (cfs_hash_is_rehashing(hs))
- cfs_hash_rehash_cancel(hs);
-}
-
-static void
-cfs_hash_for_each_exit(struct cfs_hash *hs)
-{
- int remained;
- int bits;
-
- if (!cfs_hash_with_rehash(hs))
- return;
- cfs_hash_lock(hs, 1);
- remained = --hs->hs_iterators;
- bits = cfs_hash_rehash_bits(hs);
- cfs_hash_unlock(hs, 1);
- /* NB: it's race on cfs_has_t::hs_iterating, see above */
- if (!remained)
- hs->hs_iterating = 0;
- if (bits > 0) {
- cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
- CFS_HASH_LOOP_HOG);
- }
-}
-
-/**
- * For each item in the libcfs hash @hs call the passed callback @func
- * and pass to it as an argument each hash item and the private @data.
- *
- * a) the function may sleep!
- * b) during the callback:
- * . the bucket lock is held so the callback must never sleep.
- * . if @removal_safe is true, use can remove current item by
- * cfs_hash_bd_del_locked
- */
-static u64
-cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data, int remove_safe)
-{
- struct hlist_node *hnode;
- struct hlist_node *pos;
- struct cfs_hash_bd bd;
- u64 count = 0;
- int excl = !!remove_safe;
- int loop = 0;
- int i;
-
- cfs_hash_for_each_enter(hs);
-
- cfs_hash_lock(hs, 0);
- LASSERT(!cfs_hash_is_rehashing(hs));
-
- cfs_hash_for_each_bucket(hs, &bd, i) {
- struct hlist_head *hhead;
-
- cfs_hash_bd_lock(hs, &bd, excl);
- if (!func) { /* only glimpse size */
- count += bd.bd_bucket->hsb_count;
- cfs_hash_bd_unlock(hs, &bd, excl);
- continue;
- }
-
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
- hlist_for_each_safe(hnode, pos, hhead) {
- cfs_hash_bucket_validate(hs, &bd, hnode);
- count++;
- loop++;
- if (func(hs, &bd, hnode, data)) {
- cfs_hash_bd_unlock(hs, &bd, excl);
- goto out;
- }
- }
- }
- cfs_hash_bd_unlock(hs, &bd, excl);
- if (loop < CFS_HASH_LOOP_HOG)
- continue;
- loop = 0;
- cfs_hash_unlock(hs, 0);
- cond_resched();
- cfs_hash_lock(hs, 0);
- }
- out:
- cfs_hash_unlock(hs, 0);
-
- cfs_hash_for_each_exit(hs);
- return count;
-}
-
-struct cfs_hash_cond_arg {
- cfs_hash_cond_opt_cb_t func;
- void *arg;
-};
-
-static int
-cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- struct cfs_hash_cond_arg *cond = data;
-
- if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
- cfs_hash_bd_del_locked(hs, bd, hnode);
- return 0;
-}
-
-/**
- * Delete item from the libcfs hash @hs when @func return true.
- * The write lock being hold during loop for each bucket to avoid
- * any object be reference.
- */
-void
-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data)
-{
- struct cfs_hash_cond_arg arg = {
- .func = func,
- .arg = data,
- };
-
- cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
-}
-EXPORT_SYMBOL(cfs_hash_cond_del);
-
-void
-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
-{
- cfs_hash_for_each_tight(hs, func, data, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each);
-
-void
-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
-{
- cfs_hash_for_each_tight(hs, func, data, 1);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_safe);
-
-static int
-cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- *(int *)data = 0;
- return 1; /* return 1 to break the loop */
-}
-
-int
-cfs_hash_is_empty(struct cfs_hash *hs)
-{
- int empty = 1;
-
- cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
- return empty;
-}
-EXPORT_SYMBOL(cfs_hash_is_empty);
-
-u64
-cfs_hash_size_get(struct cfs_hash *hs)
-{
- return cfs_hash_with_counter(hs) ?
- atomic_read(&hs->hs_count) :
- cfs_hash_for_each_tight(hs, NULL, NULL, 0);
-}
-EXPORT_SYMBOL(cfs_hash_size_get);
-
-/*
- * cfs_hash_for_each_relax:
- * Iterate the hash table and call @func on each item without
- * any lock. This function can't guarantee to finish iteration
- * if these features are enabled:
- *
- * a. if rehash_key is enabled, an item can be moved from
- * one bucket to another bucket
- * b. user can remove non-zero-ref item from hash-table,
- * so the item can be removed from hash-table, even worse,
- * it's possible that user changed key and insert to another
- * hash bucket.
- * there's no way for us to finish iteration correctly on previous
- * two cases, so iteration has to be stopped on change.
- */
-static int
-cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data, int start)
-{
- struct hlist_node *next = NULL;
- struct hlist_node *hnode;
- struct cfs_hash_bd bd;
- u32 version;
- int count = 0;
- int stop_on_change;
- int has_put_locked;
- int end = -1;
- int rc = 0;
- int i;
-
- stop_on_change = cfs_hash_with_rehash_key(hs) ||
- !cfs_hash_with_no_itemref(hs);
- has_put_locked = hs->hs_ops->hs_put_locked != NULL;
- cfs_hash_lock(hs, 0);
-again:
- LASSERT(!cfs_hash_is_rehashing(hs));
-
- cfs_hash_for_each_bucket(hs, &bd, i) {
- struct hlist_head *hhead;
-
- if (i < start)
- continue;
- else if (end > 0 && i >= end)
- break;
-
- cfs_hash_bd_lock(hs, &bd, 0);
- version = cfs_hash_bd_version_get(&bd);
-
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
- hnode = hhead->first;
- if (!hnode)
- continue;
- cfs_hash_get(hs, hnode);
-
- for (; hnode; hnode = next) {
- cfs_hash_bucket_validate(hs, &bd, hnode);
- next = hnode->next;
- if (next)
- cfs_hash_get(hs, next);
- cfs_hash_bd_unlock(hs, &bd, 0);
- cfs_hash_unlock(hs, 0);
-
- rc = func(hs, &bd, hnode, data);
- if (stop_on_change || !has_put_locked)
- cfs_hash_put(hs, hnode);
- cond_resched();
- count++;
-
- cfs_hash_lock(hs, 0);
- cfs_hash_bd_lock(hs, &bd, 0);
- if (stop_on_change) {
- if (version !=
- cfs_hash_bd_version_get(&bd))
- rc = -EINTR;
- } else if (has_put_locked) {
- cfs_hash_put_locked(hs, hnode);
- }
- if (rc) /* callback wants to break iteration */
- break;
- }
- if (next) {
- if (has_put_locked) {
- cfs_hash_put_locked(hs, next);
- next = NULL;
- }
- break;
- } else if (rc) {
- break;
- }
- }
- cfs_hash_bd_unlock(hs, &bd, 0);
- if (next && !has_put_locked) {
- cfs_hash_put(hs, next);
- next = NULL;
- }
- if (rc) /* callback wants to break iteration */
- break;
- }
- if (start > 0 && !rc) {
- end = start;
- start = 0;
- goto again;
- }
-
- cfs_hash_unlock(hs, 0);
- return count;
-}
-
-int
-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data, int start)
-{
- if (cfs_hash_with_no_lock(hs) ||
- cfs_hash_with_rehash_key(hs) ||
- !cfs_hash_with_no_itemref(hs))
- return -EOPNOTSUPP;
-
- if (!hs->hs_ops->hs_get ||
- (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
- return -EOPNOTSUPP;
-
- cfs_hash_for_each_enter(hs);
- cfs_hash_for_each_relax(hs, func, data, start);
- cfs_hash_for_each_exit(hs);
-
- return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_nolock);
-
-/**
- * For each hash bucket in the libcfs hash @hs call the passed callback
- * @func until all the hash buckets are empty. The passed callback @func
- * or the previously registered callback hs->hs_put must remove the item
- * from the hash. You may either use the cfs_hash_del() or hlist_del()
- * functions. No rwlocks will be held during the callback @func it is
- * safe to sleep if needed. This function will not terminate until the
- * hash is empty. Note it is still possible to concurrently add new
- * items in to the hash. It is the callers responsibility to ensure
- * the required locking is in place to prevent concurrent insertions.
- */
-int
-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
-{
- unsigned int i = 0;
-
- if (cfs_hash_with_no_lock(hs))
- return -EOPNOTSUPP;
-
- if (!hs->hs_ops->hs_get ||
- (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
- return -EOPNOTSUPP;
-
- cfs_hash_for_each_enter(hs);
- while (cfs_hash_for_each_relax(hs, func, data, 0)) {
- CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
- hs->hs_name, i++);
- }
- cfs_hash_for_each_exit(hs);
- return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_empty);
-
-void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
- cfs_hash_for_each_cb_t func, void *data)
-{
- struct hlist_head *hhead;
- struct hlist_node *hnode;
- struct cfs_hash_bd bd;
-
- cfs_hash_for_each_enter(hs);
- cfs_hash_lock(hs, 0);
- if (hindex >= CFS_HASH_NHLIST(hs))
- goto out;
-
- cfs_hash_bd_index_set(hs, hindex, &bd);
-
- cfs_hash_bd_lock(hs, &bd, 0);
- hhead = cfs_hash_bd_hhead(hs, &bd);
- hlist_for_each(hnode, hhead) {
- if (func(hs, &bd, hnode, data))
- break;
- }
- cfs_hash_bd_unlock(hs, &bd, 0);
-out:
- cfs_hash_unlock(hs, 0);
- cfs_hash_for_each_exit(hs);
-}
-EXPORT_SYMBOL(cfs_hash_hlist_for_each);
-
-/*
- * For each item in the libcfs hash @hs which matches the @key call
- * the passed callback @func and pass to it as an argument each hash
- * item and the private @data. During the callback the bucket lock
- * is held so the callback must never sleep.
- */
-void
-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
- cfs_hash_for_each_cb_t func, void *data)
-{
- struct hlist_node *hnode;
- struct cfs_hash_bd bds[2];
- unsigned int i;
-
- cfs_hash_lock(hs, 0);
-
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
- cfs_hash_for_each_bd(bds, 2, i) {
- struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
-
- hlist_for_each(hnode, hlist) {
- cfs_hash_bucket_validate(hs, &bds[i], hnode);
-
- if (cfs_hash_keycmp(hs, key, hnode)) {
- if (func(hs, &bds[i], hnode, data))
- break;
- }
- }
- }
-
- cfs_hash_dual_bd_unlock(hs, bds, 0);
- cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_key);
-
-/**
- * Rehash the libcfs hash @hs to the given @bits. This can be used
- * to grow the hash size when excessive chaining is detected, or to
- * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH
- * flag is set in @hs the libcfs hash may be dynamically rehashed
- * during addition or removal if the hash's theta value exceeds
- * either the hs->hs_min_theta or hs->max_theta values. By default
- * these values are tuned to keep the chained hash depth small, and
- * this approach assumes a reasonably uniform hashing function. The
- * theta thresholds for @hs are tunable via cfs_hash_set_theta().
- */
-void
-cfs_hash_rehash_cancel(struct cfs_hash *hs)
-{
- LASSERT(cfs_hash_with_rehash(hs));
- cancel_work_sync(&hs->hs_rehash_work);
-}
-
-void
-cfs_hash_rehash(struct cfs_hash *hs, int do_rehash)
-{
- int rc;
-
- LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
-
- cfs_hash_lock(hs, 1);
-
- rc = cfs_hash_rehash_bits(hs);
- if (rc <= 0) {
- cfs_hash_unlock(hs, 1);
- return;
- }
-
- hs->hs_rehash_bits = rc;
- if (!do_rehash) {
- /* launch and return */
- queue_work(cfs_rehash_wq, &hs->hs_rehash_work);
- cfs_hash_unlock(hs, 1);
- return;
- }
-
- /* rehash right now */
- cfs_hash_unlock(hs, 1);
-
- cfs_hash_rehash_worker(&hs->hs_rehash_work);
-}
-
-static int
-cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
-{
- struct cfs_hash_bd new;
- struct hlist_head *hhead;
- struct hlist_node *hnode;
- struct hlist_node *pos;
- void *key;
- int c = 0;
-
- /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
- cfs_hash_bd_for_each_hlist(hs, old, hhead) {
- hlist_for_each_safe(hnode, pos, hhead) {
- key = cfs_hash_key(hs, hnode);
- LASSERT(key);
- /* Validate hnode is in the correct bucket. */
- cfs_hash_bucket_validate(hs, old, hnode);
- /*
- * Delete from old hash bucket; move to new bucket.
- * ops->hs_key must be defined.
- */
- cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
- hs->hs_rehash_bits, key, &new);
- cfs_hash_bd_move_locked(hs, old, &new, hnode);
- c++;
- }
- }
-
- return c;
-}
-
-static void
-cfs_hash_rehash_worker(struct work_struct *work)
-{
- struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_rehash_work);
- struct cfs_hash_bucket **bkts;
- struct cfs_hash_bd bd;
- unsigned int old_size;
- unsigned int new_size;
- int bsize;
- int count = 0;
- int rc = 0;
- int i;
-
- LASSERT(hs && cfs_hash_with_rehash(hs));
-
- cfs_hash_lock(hs, 0);
- LASSERT(cfs_hash_is_rehashing(hs));
-
- old_size = CFS_HASH_NBKT(hs);
- new_size = CFS_HASH_RH_NBKT(hs);
-
- cfs_hash_unlock(hs, 0);
-
- /*
- * don't need hs::hs_rwlock for hs::hs_buckets,
- * because nobody can change bkt-table except me.
- */
- bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
- old_size, new_size);
- cfs_hash_lock(hs, 1);
- if (!bkts) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (bkts == hs->hs_buckets) {
- bkts = NULL; /* do nothing */
- goto out;
- }
-
- rc = __cfs_hash_theta(hs);
- if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
- /* free the new allocated bkt-table */
- old_size = new_size;
- new_size = CFS_HASH_NBKT(hs);
- rc = -EALREADY;
- goto out;
- }
-
- LASSERT(!hs->hs_rehash_buckets);
- hs->hs_rehash_buckets = bkts;
-
- rc = 0;
- cfs_hash_for_each_bucket(hs, &bd, i) {
- if (cfs_hash_is_exiting(hs)) {
- rc = -ESRCH;
- /* someone wants to destroy the hash, abort now */
- if (old_size < new_size) /* OK to free old bkt-table */
- break;
- /* it's shrinking, need free new bkt-table */
- hs->hs_rehash_buckets = NULL;
- old_size = new_size;
- new_size = CFS_HASH_NBKT(hs);
- goto out;
- }
-
- count += cfs_hash_rehash_bd(hs, &bd);
- if (count < CFS_HASH_LOOP_HOG ||
- cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
- continue;
- }
-
- count = 0;
- cfs_hash_unlock(hs, 1);
- cond_resched();
- cfs_hash_lock(hs, 1);
- }
-
- hs->hs_rehash_count++;
-
- bkts = hs->hs_buckets;
- hs->hs_buckets = hs->hs_rehash_buckets;
- hs->hs_rehash_buckets = NULL;
-
- hs->hs_cur_bits = hs->hs_rehash_bits;
-out:
- hs->hs_rehash_bits = 0;
- bsize = cfs_hash_bkt_size(hs);
- cfs_hash_unlock(hs, 1);
- /* can't refer to @hs anymore because it could be destroyed */
- if (bkts)
- cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
- if (rc)
- CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
-}
-
-/**
- * Rehash the object referenced by @hnode in the libcfs hash @hs. The
- * @old_key must be provided to locate the objects previous location
- * in the hash, and the @new_key will be used to reinsert the object.
- * Use this function instead of a cfs_hash_add() + cfs_hash_del()
- * combo when it is critical that there is no window in time where the
- * object is missing from the hash. When an object is being rehashed
- * the registered cfs_hash_get() and cfs_hash_put() functions will
- * not be called.
- */
-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
- void *new_key, struct hlist_node *hnode)
-{
- struct cfs_hash_bd bds[3];
- struct cfs_hash_bd old_bds[2];
- struct cfs_hash_bd new_bd;
-
- LASSERT(!hlist_unhashed(hnode));
-
- cfs_hash_lock(hs, 0);
-
- cfs_hash_dual_bd_get(hs, old_key, old_bds);
- cfs_hash_bd_get(hs, new_key, &new_bd);
-
- bds[0] = old_bds[0];
- bds[1] = old_bds[1];
- bds[2] = new_bd;
-
- /* NB: bds[0] and bds[1] are ordered already */
- cfs_hash_bd_order(&bds[1], &bds[2]);
- cfs_hash_bd_order(&bds[0], &bds[1]);
-
- cfs_hash_multi_bd_lock(hs, bds, 3, 1);
- if (likely(!old_bds[1].bd_bucket)) {
- cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
- } else {
- cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
- cfs_hash_bd_add_locked(hs, &new_bd, hnode);
- }
- /* overwrite key inside locks, otherwise may screw up with
- * other operations, i.e: rehash
- */
- cfs_hash_keycpy(hs, hnode, new_key);
-
- cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
- cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_rehash_key);
-
-void cfs_hash_debug_header(struct seq_file *m)
-{
- seq_printf(m, "%-*s cur min max theta t-min t-max flags rehash count maxdep maxdepb distribution\n",
- CFS_HASH_BIGNAME_LEN, "name");
-}
-EXPORT_SYMBOL(cfs_hash_debug_header);
-
-static struct cfs_hash_bucket **
-cfs_hash_full_bkts(struct cfs_hash *hs)
-{
- /* NB: caller should hold hs->hs_rwlock if REHASH is set */
- if (!hs->hs_rehash_buckets)
- return hs->hs_buckets;
-
- LASSERT(hs->hs_rehash_bits);
- return hs->hs_rehash_bits > hs->hs_cur_bits ?
- hs->hs_rehash_buckets : hs->hs_buckets;
-}
-
-static unsigned int
-cfs_hash_full_nbkt(struct cfs_hash *hs)
-{
- /* NB: caller should hold hs->hs_rwlock if REHASH is set */
- if (!hs->hs_rehash_buckets)
- return CFS_HASH_NBKT(hs);
-
- LASSERT(hs->hs_rehash_bits);
- return hs->hs_rehash_bits > hs->hs_cur_bits ?
- CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
-}
-
-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m)
-{
- int dist[8] = { 0, };
- int maxdep = -1;
- int maxdepb = -1;
- int total = 0;
- int theta;
- int i;
-
- cfs_hash_lock(hs, 0);
- theta = __cfs_hash_theta(hs);
-
- seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ",
- CFS_HASH_BIGNAME_LEN, hs->hs_name,
- 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
- 1 << hs->hs_max_bits,
- __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
- __cfs_hash_theta_int(hs->hs_min_theta),
- __cfs_hash_theta_frac(hs->hs_min_theta),
- __cfs_hash_theta_int(hs->hs_max_theta),
- __cfs_hash_theta_frac(hs->hs_max_theta),
- hs->hs_flags, hs->hs_rehash_count);
-
- /*
- * The distribution is a summary of the chained hash depth in
- * each of the libcfs hash buckets. Each buckets hsb_count is
- * divided by the hash theta value and used to generate a
- * histogram of the hash distribution. A uniform hash will
- * result in all hash buckets being close to the average thus
- * only the first few entries in the histogram will be non-zero.
- * If you hash function results in a non-uniform hash the will
- * be observable by outlier bucks in the distribution histogram.
- *
- * Uniform hash distribution: 128/128/0/0/0/0/0/0
- * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1
- */
- for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
- struct cfs_hash_bd bd;
-
- bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
- cfs_hash_bd_lock(hs, &bd, 0);
- if (maxdep < bd.bd_bucket->hsb_depmax) {
- maxdep = bd.bd_bucket->hsb_depmax;
- maxdepb = ffz(~maxdep);
- }
- total += bd.bd_bucket->hsb_count;
- dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++;
- cfs_hash_bd_unlock(hs, &bd, 0);
- }
-
- seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
- for (i = 0; i < 8; i++)
- seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/');
-
- cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_debug_str);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
deleted file mode 100644
index 76291a350406..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ /dev/null
@@ -1,228 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-
-/** Global CPU partition table */
-struct cfs_cpt_table *cfs_cpt_table __read_mostly;
-EXPORT_SYMBOL(cfs_cpt_table);
-
-#ifndef HAVE_LIBCFS_CPT
-
-#define CFS_CPU_VERSION_MAGIC 0xbabecafe
-
-struct cfs_cpt_table *
-cfs_cpt_table_alloc(unsigned int ncpt)
-{
- struct cfs_cpt_table *cptab;
-
- if (ncpt != 1) {
- CERROR("Can't support cpu partition number %d\n", ncpt);
- return NULL;
- }
-
- cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
- if (cptab) {
- cptab->ctb_version = CFS_CPU_VERSION_MAGIC;
- node_set(0, cptab->ctb_nodemask);
- cptab->ctb_nparts = ncpt;
- }
-
- return cptab;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-void
-cfs_cpt_table_free(struct cfs_cpt_table *cptab)
-{
- LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC);
-
- kfree(cptab);
-}
-EXPORT_SYMBOL(cfs_cpt_table_free);
-
-#ifdef CONFIG_SMP
-int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
- int rc;
-
- rc = snprintf(buf, len, "%d\t: %d\n", 0, 0);
- len -= rc;
- if (len <= 0)
- return -EFBIG;
-
- return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-#endif /* CONFIG_SMP */
-
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_number);
-
-int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_weight);
-
-int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_online);
-
-nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
- return &cptab->ctb_nodemask;
-}
-EXPORT_SYMBOL(cfs_cpt_nodemask);
-
-int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpu);
-
-void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpu);
-
-int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpumask);
-
-void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
-
-int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_node);
-
-void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_node);
-
-int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_nodemask);
-
-void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
-
-void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_clear);
-
-int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_spread_node);
-
-int
-cfs_cpu_ht_nsiblings(int cpu)
-{
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpu_ht_nsiblings);
-
-int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_current);
-
-int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_of_cpu);
-
-int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_bind);
-
-void
-cfs_cpu_fini(void)
-{
- if (cfs_cpt_table) {
- cfs_cpt_table_free(cfs_cpt_table);
- cfs_cpt_table = NULL;
- }
-}
-
-int
-cfs_cpu_init(void)
-{
- cfs_cpt_table = cfs_cpt_table_alloc(1);
-
- return cfs_cpt_table ? 0 : -1;
-}
-
-#endif /* HAVE_LIBCFS_CPT */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
deleted file mode 100644
index 670ad5a34224..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
+++ /dev/null
@@ -1,152 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-
-/** destroy cpu-partition lock, see libcfs_private.h for more detail */
-void
-cfs_percpt_lock_free(struct cfs_percpt_lock *pcl)
-{
- LASSERT(pcl->pcl_locks);
- LASSERT(!pcl->pcl_locked);
-
- cfs_percpt_free(pcl->pcl_locks);
- kfree(pcl);
-}
-EXPORT_SYMBOL(cfs_percpt_lock_free);
-
-/**
- * create cpu-partition lock, see libcfs_private.h for more detail.
- *
- * cpu-partition lock is designed for large-scale SMP system, so we need to
- * reduce cacheline conflict as possible as we can, that's the
- * reason we always allocate cacheline-aligned memory block.
- */
-struct cfs_percpt_lock *
-cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
- struct lock_class_key *keys)
-{
- struct cfs_percpt_lock *pcl;
- spinlock_t *lock;
- int i;
-
- /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
- pcl = kzalloc(sizeof(*pcl), GFP_NOFS);
- if (!pcl)
- return NULL;
-
- pcl->pcl_cptab = cptab;
- pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock));
- if (!pcl->pcl_locks) {
- kfree(pcl);
- return NULL;
- }
-
- if (!keys)
- CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n");
-
- cfs_percpt_for_each(lock, i, pcl->pcl_locks) {
- spin_lock_init(lock);
- if (keys)
- lockdep_set_class(lock, &keys[i]);
- }
-
- return pcl;
-}
-EXPORT_SYMBOL(cfs_percpt_lock_create);
-
-/**
- * lock a CPU partition
- *
- * \a index != CFS_PERCPT_LOCK_EX
- * hold private lock indexed by \a index
- *
- * \a index == CFS_PERCPT_LOCK_EX
- * exclusively lock @pcl and nobody can take private lock
- */
-void
-cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
- __acquires(pcl->pcl_locks)
-{
- int ncpt = cfs_cpt_number(pcl->pcl_cptab);
- int i;
-
- LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
-
- if (ncpt == 1) {
- index = 0;
- } else { /* serialize with exclusive lock */
- while (pcl->pcl_locked)
- cpu_relax();
- }
-
- if (likely(index != CFS_PERCPT_LOCK_EX)) {
- spin_lock(pcl->pcl_locks[index]);
- return;
- }
-
- /* exclusive lock request */
- for (i = 0; i < ncpt; i++) {
- spin_lock(pcl->pcl_locks[i]);
- if (!i) {
- LASSERT(!pcl->pcl_locked);
- /* nobody should take private lock after this
- * so I wouldn't starve for too long time
- */
- pcl->pcl_locked = 1;
- }
- }
-}
-EXPORT_SYMBOL(cfs_percpt_lock);
-
-/** unlock a CPU partition */
-void
-cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
- __releases(pcl->pcl_locks)
-{
- int ncpt = cfs_cpt_number(pcl->pcl_cptab);
- int i;
-
- index = ncpt == 1 ? 0 : index;
-
- if (likely(index != CFS_PERCPT_LOCK_EX)) {
- spin_unlock(pcl->pcl_locks[index]);
- return;
- }
-
- for (i = ncpt - 1; i >= 0; i--) {
- if (!i) {
- LASSERT(pcl->pcl_locked);
- pcl->pcl_locked = 0;
- }
- spin_unlock(pcl->pcl_locks[i]);
- }
-}
-EXPORT_SYMBOL(cfs_percpt_unlock);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
deleted file mode 100644
index 7faed94994ea..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
+++ /dev/null
@@ -1,167 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-
-struct cfs_var_array {
- unsigned int va_count; /* # of buffers */
- unsigned int va_size; /* size of each var */
- struct cfs_cpt_table *va_cptab; /* cpu partition table */
- void *va_ptrs[0]; /* buffer addresses */
-};
-
-/*
- * free per-cpu data, see more detail in cfs_percpt_free
- */
-void
-cfs_percpt_free(void *vars)
-{
- struct cfs_var_array *arr;
- int i;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- for (i = 0; i < arr->va_count; i++)
- kfree(arr->va_ptrs[i]);
-
- kvfree(arr);
-}
-EXPORT_SYMBOL(cfs_percpt_free);
-
-/*
- * allocate per cpu-partition variables, returned value is an array of pointers,
- * variable can be indexed by CPU partition ID, i.e:
- *
- * arr = cfs_percpt_alloc(cfs_cpu_pt, size);
- * then caller can access memory block for CPU 0 by arr[0],
- * memory block for CPU 1 by arr[1]...
- * memory block for CPU N by arr[N]...
- *
- * cacheline aligned.
- */
-void *
-cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
-{
- struct cfs_var_array *arr;
- int count;
- int i;
-
- count = cfs_cpt_number(cptab);
-
- arr = kvzalloc(offsetof(struct cfs_var_array, va_ptrs[count]),
- GFP_KERNEL);
- if (!arr)
- return NULL;
-
- size = L1_CACHE_ALIGN(size);
- arr->va_size = size;
- arr->va_count = count;
- arr->va_cptab = cptab;
-
- for (i = 0; i < count; i++) {
- arr->va_ptrs[i] = kzalloc_node(size, GFP_KERNEL,
- cfs_cpt_spread_node(cptab, i));
- if (!arr->va_ptrs[i]) {
- cfs_percpt_free((void *)&arr->va_ptrs[0]);
- return NULL;
- }
- }
-
- return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_percpt_alloc);
-
-/*
- * return number of CPUs (or number of elements in per-cpu data)
- * according to cptab of @vars
- */
-int
-cfs_percpt_number(void *vars)
-{
- struct cfs_var_array *arr;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- return arr->va_count;
-}
-EXPORT_SYMBOL(cfs_percpt_number);
-
-/*
- * free variable array, see more detail in cfs_array_alloc
- */
-void
-cfs_array_free(void *vars)
-{
- struct cfs_var_array *arr;
- int i;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- for (i = 0; i < arr->va_count; i++) {
- if (!arr->va_ptrs[i])
- continue;
-
- kvfree(arr->va_ptrs[i]);
- }
- kvfree(arr);
-}
-EXPORT_SYMBOL(cfs_array_free);
-
-/*
- * allocate a variable array, returned value is an array of pointers.
- * Caller can specify length of array by @count, @size is size of each
- * memory block in array.
- */
-void *
-cfs_array_alloc(int count, unsigned int size)
-{
- struct cfs_var_array *arr;
- int i;
-
- arr = kvmalloc(offsetof(struct cfs_var_array, va_ptrs[count]), GFP_KERNEL);
- if (!arr)
- return NULL;
-
- arr->va_count = count;
- arr->va_size = size;
-
- for (i = 0; i < count; i++) {
- arr->va_ptrs[i] = kvzalloc(size, GFP_KERNEL);
-
- if (!arr->va_ptrs[i]) {
- cfs_array_free((void *)&arr->va_ptrs[0]);
- return NULL;
- }
- }
-
- return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_array_alloc);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
deleted file mode 100644
index 442889a3d729..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ /dev/null
@@ -1,556 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * String manipulation functions.
- *
- * libcfs/libcfs/libcfs_string.c
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- */
-
-#include <linux/libcfs/libcfs.h>
-
-/* Convert a text string to a bitmask */
-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
- int *oldmask, int minmask, int allmask)
-{
- const char *debugstr;
- char op = '\0';
- int newmask = minmask, i, len, found = 0;
-
- /* <str> must be a list of tokens separated by whitespace
- * and optionally an operator ('+' or '-'). If an operator
- * appears first in <str>, '*oldmask' is used as the starting point
- * (relative), otherwise minmask is used (absolute). An operator
- * applies to all following tokens up to the next operator.
- */
- while (*str != '\0') {
- while (isspace(*str))
- str++;
- if (*str == '\0')
- break;
- if (*str == '+' || *str == '-') {
- op = *str++;
- if (!found)
- /* only if first token is relative */
- newmask = *oldmask;
- while (isspace(*str))
- str++;
- if (*str == '\0') /* trailing op */
- return -EINVAL;
- }
-
- /* find token length */
- len = 0;
- while (str[len] != '\0' && !isspace(str[len]) &&
- str[len] != '+' && str[len] != '-')
- len++;
-
- /* match token */
- found = 0;
- for (i = 0; i < 32; i++) {
- debugstr = bit2str(i);
- if (debugstr && strlen(debugstr) == len &&
- !strncasecmp(str, debugstr, len)) {
- if (op == '-')
- newmask &= ~(1 << i);
- else
- newmask |= (1 << i);
- found = 1;
- break;
- }
- }
- if (!found && len == 3 &&
- !strncasecmp(str, "ALL", len)) {
- if (op == '-')
- newmask = minmask;
- else
- newmask = allmask;
- found = 1;
- }
- if (!found) {
- CWARN("unknown mask '%.*s'.\n"
- "mask usage: [+|-]<all|type> ...\n", len, str);
- return -EINVAL;
- }
- str += len;
- }
-
- *oldmask = newmask;
- return 0;
-}
-
-/* get the first string out of @str */
-char *cfs_firststr(char *str, size_t size)
-{
- size_t i = 0;
- char *end;
-
- /* trim leading spaces */
- while (i < size && *str && isspace(*str)) {
- ++i;
- ++str;
- }
-
- /* string with all spaces */
- if (*str == '\0')
- goto out;
-
- end = str;
- while (i < size && *end != '\0' && !isspace(*end)) {
- ++i;
- ++end;
- }
-
- *end = '\0';
-out:
- return str;
-}
-EXPORT_SYMBOL(cfs_firststr);
-
-/**
- * Extracts tokens from strings.
- *
- * Looks for \a delim in string \a next, sets \a res to point to
- * substring before the delimiter, sets \a next right after the found
- * delimiter.
- *
- * \retval 1 if \a res points to a string of non-whitespace characters
- * \retval 0 otherwise
- */
-int
-cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
-{
- char *end;
-
- if (!next->ls_str)
- return 0;
-
- /* skip leading white spaces */
- while (next->ls_len) {
- if (!isspace(*next->ls_str))
- break;
- next->ls_str++;
- next->ls_len--;
- }
-
- if (!next->ls_len) /* whitespaces only */
- return 0;
-
- if (*next->ls_str == delim) {
- /* first non-writespace is the delimiter */
- return 0;
- }
-
- res->ls_str = next->ls_str;
- end = memchr(next->ls_str, delim, next->ls_len);
- if (!end) {
- /* there is no the delimeter in the string */
- end = next->ls_str + next->ls_len;
- next->ls_str = NULL;
- } else {
- next->ls_str = end + 1;
- next->ls_len -= (end - res->ls_str + 1);
- }
-
- /* skip ending whitespaces */
- while (--end != res->ls_str) {
- if (!isspace(*end))
- break;
- }
-
- res->ls_len = end - res->ls_str + 1;
- return 1;
-}
-EXPORT_SYMBOL(cfs_gettok);
-
-/**
- * Converts string to integer.
- *
- * Accepts decimal and hexadecimal number recordings.
- *
- * \retval 1 if first \a nob chars of \a str convert to decimal or
- * hexadecimal integer in the range [\a min, \a max]
- * \retval 0 otherwise
- */
-int
-cfs_str2num_check(char *str, int nob, unsigned int *num,
- unsigned int min, unsigned int max)
-{
- bool all_numbers = true;
- char *endp, cache;
- int rc;
-
- /**
- * kstrouint can only handle strings composed
- * of only numbers. We need to scan the string
- * passed in for the first non-digit character
- * and end the string at that location. If we
- * don't find any non-digit character we still
- * need to place a '\0' at position nob since
- * we are not interested in the rest of the
- * string which is longer than nob in size.
- * After we are done the character at the
- * position we placed '\0' must be restored.
- */
- for (endp = str; endp < str + nob; endp++) {
- if (!isdigit(*endp)) {
- all_numbers = false;
- break;
- }
- }
- cache = *endp;
- *endp = '\0';
-
- rc = kstrtouint(str, 10, num);
- *endp = cache;
- if (rc || !all_numbers)
- return 0;
-
- return (*num >= min && *num <= max);
-}
-EXPORT_SYMBOL(cfs_str2num_check);
-
-/**
- * Parses \<range_expr\> token of the syntax. If \a bracketed is false,
- * \a src should only have a single token which can be \<number\> or \*
- *
- * \retval pointer to allocated range_expr and initialized
- * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a
- `* src parses to
- * \<number\> |
- * \<number\> '-' \<number\> |
- * \<number\> '-' \<number\> '/' \<number\>
- * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or
- * -ENOMEM will be returned.
- */
-static int
-cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max,
- int bracketed, struct cfs_range_expr **expr)
-{
- struct cfs_range_expr *re;
- struct cfs_lstr tok;
-
- re = kzalloc(sizeof(*re), GFP_NOFS);
- if (!re)
- return -ENOMEM;
-
- if (src->ls_len == 1 && src->ls_str[0] == '*') {
- re->re_lo = min;
- re->re_hi = max;
- re->re_stride = 1;
- goto out;
- }
-
- if (cfs_str2num_check(src->ls_str, src->ls_len,
- &re->re_lo, min, max)) {
- /* <number> is parsed */
- re->re_hi = re->re_lo;
- re->re_stride = 1;
- goto out;
- }
-
- if (!bracketed || !cfs_gettok(src, '-', &tok))
- goto failed;
-
- if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
- &re->re_lo, min, max))
- goto failed;
-
- /* <number> - */
- if (cfs_str2num_check(src->ls_str, src->ls_len,
- &re->re_hi, min, max)) {
- /* <number> - <number> is parsed */
- re->re_stride = 1;
- goto out;
- }
-
- /* go to check <number> '-' <number> '/' <number> */
- if (cfs_gettok(src, '/', &tok)) {
- if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
- &re->re_hi, min, max))
- goto failed;
-
- /* <number> - <number> / ... */
- if (cfs_str2num_check(src->ls_str, src->ls_len,
- &re->re_stride, min, max)) {
- /* <number> - <number> / <number> is parsed */
- goto out;
- }
- }
-
- out:
- *expr = re;
- return 0;
-
- failed:
- kfree(re);
- return -EINVAL;
-}
-
-/**
- * Print the range expression \a re into specified \a buffer.
- * If \a bracketed is true, expression does not need additional
- * brackets.
- *
- * \retval number of characters written
- */
-static int
-cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr,
- bool bracketed)
-{
- int i;
- char s[] = "[";
- char e[] = "]";
-
- if (bracketed) {
- s[0] = '\0';
- e[0] = '\0';
- }
-
- if (expr->re_lo == expr->re_hi)
- i = scnprintf(buffer, count, "%u", expr->re_lo);
- else if (expr->re_stride == 1)
- i = scnprintf(buffer, count, "%s%u-%u%s",
- s, expr->re_lo, expr->re_hi, e);
- else
- i = scnprintf(buffer, count, "%s%u-%u/%u%s",
- s, expr->re_lo, expr->re_hi, expr->re_stride, e);
- return i;
-}
-
-/**
- * Print a list of range expressions (\a expr_list) into specified \a buffer.
- * If the list contains several expressions, separate them with comma
- * and surround the list with brackets.
- *
- * \retval number of characters written
- */
-int
-cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list)
-{
- struct cfs_range_expr *expr;
- int i = 0, j = 0;
- int numexprs = 0;
-
- if (count <= 0)
- return 0;
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link)
- numexprs++;
-
- if (numexprs > 1)
- i += scnprintf(buffer + i, count - i, "[");
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- if (j++)
- i += scnprintf(buffer + i, count - i, ",");
- i += cfs_range_expr_print(buffer + i, count - i, expr,
- numexprs > 1);
- }
-
- if (numexprs > 1)
- i += scnprintf(buffer + i, count - i, "]");
-
- return i;
-}
-EXPORT_SYMBOL(cfs_expr_list_print);
-
-/**
- * Matches value (\a value) against ranges expression list \a expr_list.
- *
- * \retval 1 if \a value matches
- * \retval 0 otherwise
- */
-int
-cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list)
-{
- struct cfs_range_expr *expr;
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- if (value >= expr->re_lo && value <= expr->re_hi &&
- !((value - expr->re_lo) % expr->re_stride))
- return 1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(cfs_expr_list_match);
-
-/**
- * Convert express list (\a expr_list) to an array of all matched values
- *
- * \retval N N is total number of all matched values
- * \retval 0 if expression list is empty
- * \retval < 0 for failure
- */
-int
-cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp)
-{
- struct cfs_range_expr *expr;
- u32 *val;
- int count = 0;
- int i;
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- for (i = expr->re_lo; i <= expr->re_hi; i++) {
- if (!((i - expr->re_lo) % expr->re_stride))
- count++;
- }
- }
-
- if (!count) /* empty expression list */
- return 0;
-
- if (count > max) {
- CERROR("Number of values %d exceeds max allowed %d\n",
- max, count);
- return -EINVAL;
- }
-
- val = kvmalloc_array(count, sizeof(val[0]), GFP_KERNEL | __GFP_ZERO);
- if (!val)
- return -ENOMEM;
-
- count = 0;
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- for (i = expr->re_lo; i <= expr->re_hi; i++) {
- if (!((i - expr->re_lo) % expr->re_stride))
- val[count++] = i;
- }
- }
-
- *valpp = val;
- return count;
-}
-EXPORT_SYMBOL(cfs_expr_list_values);
-
-/**
- * Frees cfs_range_expr structures of \a expr_list.
- *
- * \retval none
- */
-void
-cfs_expr_list_free(struct cfs_expr_list *expr_list)
-{
- while (!list_empty(&expr_list->el_exprs)) {
- struct cfs_range_expr *expr;
-
- expr = list_entry(expr_list->el_exprs.next,
- struct cfs_range_expr, re_link);
- list_del(&expr->re_link);
- kfree(expr);
- }
-
- kfree(expr_list);
-}
-EXPORT_SYMBOL(cfs_expr_list_free);
-
-/**
- * Parses \<cfs_expr_list\> token of the syntax.
- *
- * \retval 0 if \a str parses to \<number\> | \<expr_list\>
- * \retval -errno otherwise
- */
-int
-cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
- struct cfs_expr_list **elpp)
-{
- struct cfs_expr_list *expr_list;
- struct cfs_range_expr *expr;
- struct cfs_lstr src;
- int rc;
-
- expr_list = kzalloc(sizeof(*expr_list), GFP_NOFS);
- if (!expr_list)
- return -ENOMEM;
-
- src.ls_str = str;
- src.ls_len = len;
-
- INIT_LIST_HEAD(&expr_list->el_exprs);
-
- if (src.ls_str[0] == '[' &&
- src.ls_str[src.ls_len - 1] == ']') {
- src.ls_str++;
- src.ls_len -= 2;
-
- rc = -EINVAL;
- while (src.ls_str) {
- struct cfs_lstr tok;
-
- if (!cfs_gettok(&src, ',', &tok)) {
- rc = -EINVAL;
- break;
- }
-
- rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
- if (rc)
- break;
-
- list_add_tail(&expr->re_link, &expr_list->el_exprs);
- }
- } else {
- rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
- if (!rc)
- list_add_tail(&expr->re_link, &expr_list->el_exprs);
- }
-
- if (rc)
- cfs_expr_list_free(expr_list);
- else
- *elpp = expr_list;
-
- return rc;
-}
-EXPORT_SYMBOL(cfs_expr_list_parse);
-
-/**
- * Frees cfs_expr_list structures of \a list.
- *
- * For each struct cfs_expr_list structure found on \a list it frees
- * range_expr list attached to it and frees the cfs_expr_list itself.
- *
- * \retval none
- */
-void
-cfs_expr_list_free_list(struct list_head *list)
-{
- struct cfs_expr_list *el;
-
- while (!list_empty(list)) {
- el = list_entry(list->next, struct cfs_expr_list, el_link);
- list_del(&el->el_link);
- cfs_expr_list_free(el);
- }
-}
-EXPORT_SYMBOL(cfs_expr_list_free_list);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
deleted file mode 100644
index 388521e4e354..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ /dev/null
@@ -1,1079 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/libcfs/libcfs.h>
-
-#ifdef CONFIG_SMP
-
-/**
- * modparam for setting number of partitions
- *
- * 0 : estimate best value based on cores or NUMA nodes
- * 1 : disable multiple partitions
- * >1 : specify number of partitions
- */
-static int cpu_npartitions;
-module_param(cpu_npartitions, int, 0444);
-MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
-
-/**
- * modparam for setting CPU partitions patterns:
- *
- * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
- * number in bracket is processor ID (core or HT)
- *
- * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
- * are NUMA node ID, number before bracket is CPU partition ID.
- *
- * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
- *
- * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
- */
-static char *cpu_pattern = "N";
-module_param(cpu_pattern, charp, 0444);
-MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
-
-struct cfs_cpt_data {
- /* serialize hotplug etc */
- spinlock_t cpt_lock;
- /* reserved for hotplug */
- unsigned long cpt_version;
- /* mutex to protect cpt_cpumask */
- struct mutex cpt_mutex;
- /* scratch buffer for set/unset_node */
- cpumask_var_t cpt_cpumask;
-};
-
-static struct cfs_cpt_data cpt_data;
-
-static void
-cfs_node_to_cpumask(int node, cpumask_t *mask)
-{
- const cpumask_t *tmp = cpumask_of_node(node);
-
- if (tmp)
- cpumask_copy(mask, tmp);
- else
- cpumask_clear(mask);
-}
-
-void
-cfs_cpt_table_free(struct cfs_cpt_table *cptab)
-{
- int i;
-
- kvfree(cptab->ctb_cpu2cpt);
-
- for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
- struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
- kfree(part->cpt_nodemask);
- free_cpumask_var(part->cpt_cpumask);
- }
-
- kvfree(cptab->ctb_parts);
-
- kfree(cptab->ctb_nodemask);
- free_cpumask_var(cptab->ctb_cpumask);
-
- kfree(cptab);
-}
-EXPORT_SYMBOL(cfs_cpt_table_free);
-
-struct cfs_cpt_table *
-cfs_cpt_table_alloc(unsigned int ncpt)
-{
- struct cfs_cpt_table *cptab;
- int i;
-
- cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
- if (!cptab)
- return NULL;
-
- cptab->ctb_nparts = ncpt;
-
- cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
- GFP_NOFS);
- if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) ||
- !cptab->ctb_nodemask)
- goto failed;
-
- cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
- sizeof(cptab->ctb_cpu2cpt[0]),
- GFP_KERNEL);
- if (!cptab->ctb_cpu2cpt)
- goto failed;
-
- memset(cptab->ctb_cpu2cpt, -1,
- num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
-
- cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
- GFP_KERNEL);
- if (!cptab->ctb_parts)
- goto failed;
-
- for (i = 0; i < ncpt; i++) {
- struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
- part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
- GFP_NOFS);
- if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) ||
- !part->cpt_nodemask)
- goto failed;
- }
-
- spin_lock(&cpt_data.cpt_lock);
- /* Reserved for hotplug */
- cptab->ctb_version = cpt_data.cpt_version;
- spin_unlock(&cpt_data.cpt_lock);
-
- return cptab;
-
- failed:
- cfs_cpt_table_free(cptab);
- return NULL;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
- char *tmp = buf;
- int rc = 0;
- int i;
- int j;
-
- for (i = 0; i < cptab->ctb_nparts; i++) {
- if (len > 0) {
- rc = snprintf(tmp, len, "%d\t: ", i);
- len -= rc;
- }
-
- if (len <= 0) {
- rc = -EFBIG;
- goto out;
- }
-
- tmp += rc;
- for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
- rc = snprintf(tmp, len, "%d ", j);
- len -= rc;
- if (len <= 0) {
- rc = -EFBIG;
- goto out;
- }
- tmp += rc;
- }
-
- *tmp = '\n';
- tmp++;
- len--;
- }
-
- out:
- if (rc < 0)
- return rc;
-
- return tmp - buf;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
- return cptab->ctb_nparts;
-}
-EXPORT_SYMBOL(cfs_cpt_number);
-
-int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- cpumask_weight(cptab->ctb_cpumask) :
- cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
-}
-EXPORT_SYMBOL(cfs_cpt_weight);
-
-int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- cpumask_any_and(cptab->ctb_cpumask,
- cpu_online_mask) < nr_cpu_ids :
- cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
- cpu_online_mask) < nr_cpu_ids;
-}
-EXPORT_SYMBOL(cfs_cpt_online);
-
-cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
-}
-EXPORT_SYMBOL(cfs_cpt_cpumask);
-
-nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
-}
-EXPORT_SYMBOL(cfs_cpt_nodemask);
-
-int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
- int node;
-
- LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
-
- if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
- CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
- return 0;
- }
-
- if (cptab->ctb_cpu2cpt[cpu] != -1) {
- CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
- cpu, cptab->ctb_cpu2cpt[cpu]);
- return 0;
- }
-
- cptab->ctb_cpu2cpt[cpu] = cpt;
-
- LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
- LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
-
- cpumask_set_cpu(cpu, cptab->ctb_cpumask);
- cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-
- node = cpu_to_node(cpu);
-
- /* first CPU of @node in this CPT table */
- if (!node_isset(node, *cptab->ctb_nodemask))
- node_set(node, *cptab->ctb_nodemask);
-
- /* first CPU of @node in this partition */
- if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
- node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpu);
-
-void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
- int node;
- int i;
-
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- if (cpu < 0 || cpu >= nr_cpu_ids) {
- CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
- return;
- }
-
- if (cpt == CFS_CPT_ANY) {
- /* caller doesn't know the partition ID */
- cpt = cptab->ctb_cpu2cpt[cpu];
- if (cpt < 0) { /* not set in this CPT-table */
- CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
- cpt, cptab);
- return;
- }
-
- } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
- CDEBUG(D_INFO,
- "CPU %d is not in cpu-partition %d\n", cpu, cpt);
- return;
- }
-
- LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
- LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-
- cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
- cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
- cptab->ctb_cpu2cpt[cpu] = -1;
-
- node = cpu_to_node(cpu);
-
- LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
- LASSERT(node_isset(node, *cptab->ctb_nodemask));
-
- for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
- /* this CPT has other CPU belonging to this node? */
- if (cpu_to_node(i) == node)
- break;
- }
-
- if (i >= nr_cpu_ids)
- node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
- for_each_cpu(i, cptab->ctb_cpumask) {
- /* this CPT-table has other CPU belonging to this node? */
- if (cpu_to_node(i) == node)
- break;
- }
-
- if (i >= nr_cpu_ids)
- node_clear(node, *cptab->ctb_nodemask);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpu);
-
-int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
- int i;
-
- if (!cpumask_weight(mask) ||
- cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
- CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
- cpt);
- return 0;
- }
-
- for_each_cpu(i, mask) {
- if (!cfs_cpt_set_cpu(cptab, cpt, i))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpumask);
-
-void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
- int i;
-
- for_each_cpu(i, mask)
- cfs_cpt_unset_cpu(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
-
-int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
- int rc;
-
- if (node < 0 || node >= MAX_NUMNODES) {
- CDEBUG(D_INFO,
- "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
- return 0;
- }
-
- mutex_lock(&cpt_data.cpt_mutex);
-
- cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
-
- rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
-
- mutex_unlock(&cpt_data.cpt_mutex);
-
- return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_set_node);
-
-void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
- if (node < 0 || node >= MAX_NUMNODES) {
- CDEBUG(D_INFO,
- "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
- return;
- }
-
- mutex_lock(&cpt_data.cpt_mutex);
-
- cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
-
- cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
-
- mutex_unlock(&cpt_data.cpt_mutex);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_node);
-
-int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
- int i;
-
- for_each_node_mask(i, *mask) {
- if (!cfs_cpt_set_node(cptab, cpt, i))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_nodemask);
-
-void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
- int i;
-
- for_each_node_mask(i, *mask)
- cfs_cpt_unset_node(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
-
-void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
- int last;
- int i;
-
- if (cpt == CFS_CPT_ANY) {
- last = cptab->ctb_nparts - 1;
- cpt = 0;
- } else {
- last = cpt;
- }
-
- for (; cpt <= last; cpt++) {
- for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
- cfs_cpt_unset_cpu(cptab, cpt, i);
- }
-}
-EXPORT_SYMBOL(cfs_cpt_clear);
-
-int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
- nodemask_t *mask;
- int weight;
- int rotor;
- int node;
-
- /* convert CPU partition ID to HW node id */
-
- if (cpt < 0 || cpt >= cptab->ctb_nparts) {
- mask = cptab->ctb_nodemask;
- rotor = cptab->ctb_spread_rotor++;
- } else {
- mask = cptab->ctb_parts[cpt].cpt_nodemask;
- rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
- }
-
- weight = nodes_weight(*mask);
- LASSERT(weight > 0);
-
- rotor %= weight;
-
- for_each_node_mask(node, *mask) {
- if (!rotor--)
- return node;
- }
-
- LBUG();
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_spread_node);
-
-int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
- int cpu;
- int cpt;
-
- preempt_disable();
- cpu = smp_processor_id();
- cpt = cptab->ctb_cpu2cpt[cpu];
-
- if (cpt < 0 && remap) {
- /* don't return negative value for safety of upper layer,
- * instead we shadow the unknown cpu to a valid partition ID
- */
- cpt = cpu % cptab->ctb_nparts;
- }
- preempt_enable();
- return cpt;
-}
-EXPORT_SYMBOL(cfs_cpt_current);
-
-int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
- LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
-
- return cptab->ctb_cpu2cpt[cpu];
-}
-EXPORT_SYMBOL(cfs_cpt_of_cpu);
-
-int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
- cpumask_var_t *cpumask;
- nodemask_t *nodemask;
- int rc;
- int i;
-
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- if (cpt == CFS_CPT_ANY) {
- cpumask = &cptab->ctb_cpumask;
- nodemask = cptab->ctb_nodemask;
- } else {
- cpumask = &cptab->ctb_parts[cpt].cpt_cpumask;
- nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
- }
-
- if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
- CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
- cpt);
- return -EINVAL;
- }
-
- for_each_online_cpu(i) {
- if (cpumask_test_cpu(i, *cpumask))
- continue;
-
- rc = set_cpus_allowed_ptr(current, *cpumask);
- set_mems_allowed(*nodemask);
- if (!rc)
- schedule(); /* switch to allowed CPU */
-
- return rc;
- }
-
- /* don't need to set affinity because all online CPUs are covered */
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_bind);
-
-/**
- * Choose max to \a number CPUs from \a node and set them in \a cpt.
- * We always prefer to choose CPU in the same core/socket.
- */
-static int
-cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
- cpumask_t *node, int number)
-{
- cpumask_var_t socket;
- cpumask_var_t core;
- int rc = 0;
- int cpu;
-
- LASSERT(number > 0);
-
- if (number >= cpumask_weight(node)) {
- while (!cpumask_empty(node)) {
- cpu = cpumask_first(node);
-
- rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
- if (!rc)
- return -EINVAL;
- cpumask_clear_cpu(cpu, node);
- }
- return 0;
- }
-
- /*
- * Allocate scratch buffers
- * As we cannot initialize a cpumask_var_t, we need
- * to alloc both before we can risk trying to free either
- */
- if (!zalloc_cpumask_var(&socket, GFP_NOFS))
- rc = -ENOMEM;
- if (!zalloc_cpumask_var(&core, GFP_NOFS))
- rc = -ENOMEM;
- if (rc)
- goto out;
-
- while (!cpumask_empty(node)) {
- cpu = cpumask_first(node);
-
- /* get cpumask for cores in the same socket */
- cpumask_copy(socket, topology_core_cpumask(cpu));
- cpumask_and(socket, socket, node);
-
- LASSERT(!cpumask_empty(socket));
-
- while (!cpumask_empty(socket)) {
- int i;
-
- /* get cpumask for hts in the same core */
- cpumask_copy(core, topology_sibling_cpumask(cpu));
- cpumask_and(core, core, node);
-
- LASSERT(!cpumask_empty(core));
-
- for_each_cpu(i, core) {
- cpumask_clear_cpu(i, socket);
- cpumask_clear_cpu(i, node);
-
- rc = cfs_cpt_set_cpu(cptab, cpt, i);
- if (!rc) {
- rc = -EINVAL;
- goto out;
- }
-
- if (!--number)
- goto out;
- }
- cpu = cpumask_first(socket);
- }
- }
-
-out:
- free_cpumask_var(socket);
- free_cpumask_var(core);
- return rc;
-}
-
-#define CPT_WEIGHT_MIN 4u
-
-static unsigned int
-cfs_cpt_num_estimate(void)
-{
- unsigned int nnode = num_online_nodes();
- unsigned int ncpu = num_online_cpus();
- unsigned int ncpt;
-
- if (ncpu <= CPT_WEIGHT_MIN) {
- ncpt = 1;
- goto out;
- }
-
- /* generate reasonable number of CPU partitions based on total number
- * of CPUs, Preferred N should be power2 and match this condition:
- * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
- */
- for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
- ;
-
- if (ncpt <= nnode) { /* fat numa system */
- while (nnode > ncpt)
- nnode >>= 1;
-
- } else { /* ncpt > nnode */
- while ((nnode << 1) <= ncpt)
- nnode <<= 1;
- }
-
- ncpt = nnode;
-
-out:
-#if (BITS_PER_LONG == 32)
- /* config many CPU partitions on 32-bit system could consume
- * too much memory
- */
- ncpt = min(2U, ncpt);
-#endif
- while (ncpu % ncpt)
- ncpt--; /* worst case is 1 */
-
- return ncpt;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create(int ncpt)
-{
- struct cfs_cpt_table *cptab = NULL;
- cpumask_var_t mask;
- int cpt = 0;
- int num;
- int rc;
- int i;
-
- rc = cfs_cpt_num_estimate();
- if (ncpt <= 0)
- ncpt = rc;
-
- if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
- CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
- ncpt, rc);
- }
-
- if (num_online_cpus() % ncpt) {
- CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
- (int)num_online_cpus(), ncpt);
- goto failed;
- }
-
- cptab = cfs_cpt_table_alloc(ncpt);
- if (!cptab) {
- CERROR("Failed to allocate CPU map(%d)\n", ncpt);
- goto failed;
- }
-
- num = num_online_cpus() / ncpt;
- if (!num) {
- CERROR("CPU changed while setting CPU partition\n");
- goto failed;
- }
-
- if (!zalloc_cpumask_var(&mask, GFP_NOFS)) {
- CERROR("Failed to allocate scratch cpumask\n");
- goto failed;
- }
-
- for_each_online_node(i) {
- cfs_node_to_cpumask(i, mask);
-
- while (!cpumask_empty(mask)) {
- struct cfs_cpu_partition *part;
- int n;
-
- /*
- * Each emulated NUMA node has all allowed CPUs in
- * the mask.
- * End loop when all partitions have assigned CPUs.
- */
- if (cpt == ncpt)
- break;
-
- part = &cptab->ctb_parts[cpt];
-
- n = num - cpumask_weight(part->cpt_cpumask);
- LASSERT(n > 0);
-
- rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
- if (rc < 0)
- goto failed_mask;
-
- LASSERT(num >= cpumask_weight(part->cpt_cpumask));
- if (num == cpumask_weight(part->cpt_cpumask))
- cpt++;
- }
- }
-
- if (cpt != ncpt ||
- num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
- CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
- cptab->ctb_nparts, num, cpt,
- cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
- goto failed_mask;
- }
-
- free_cpumask_var(mask);
-
- return cptab;
-
- failed_mask:
- free_cpumask_var(mask);
- failed:
- CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
- ncpt, num_online_nodes(), num_online_cpus());
-
- if (cptab)
- cfs_cpt_table_free(cptab);
-
- return NULL;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create_pattern(char *pattern)
-{
- struct cfs_cpt_table *cptab;
- char *str;
- int node = 0;
- int high;
- int ncpt = 0;
- int cpt;
- int rc;
- int c;
- int i;
-
- str = strim(pattern);
- if (*str == 'n' || *str == 'N') {
- pattern = str + 1;
- if (*pattern != '\0') {
- node = 1;
- } else { /* shortcut to create CPT from NUMA & CPU topology */
- node = -1;
- ncpt = num_online_nodes();
- }
- }
-
- if (!ncpt) { /* scanning bracket which is mark of partition */
- for (str = pattern;; str++, ncpt++) {
- str = strchr(str, '[');
- if (!str)
- break;
- }
- }
-
- if (!ncpt ||
- (node && ncpt > num_online_nodes()) ||
- (!node && ncpt > num_online_cpus())) {
- CERROR("Invalid pattern %s, or too many partitions %d\n",
- pattern, ncpt);
- return NULL;
- }
-
- cptab = cfs_cpt_table_alloc(ncpt);
- if (!cptab) {
- CERROR("Failed to allocate cpu partition table\n");
- return NULL;
- }
-
- if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
- cpt = 0;
-
- for_each_online_node(i) {
- if (cpt >= ncpt) {
- CERROR("CPU changed while setting CPU partition table, %d/%d\n",
- cpt, ncpt);
- goto failed;
- }
-
- rc = cfs_cpt_set_node(cptab, cpt++, i);
- if (!rc)
- goto failed;
- }
- return cptab;
- }
-
- high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
-
- for (str = strim(pattern), c = 0;; c++) {
- struct cfs_range_expr *range;
- struct cfs_expr_list *el;
- char *bracket = strchr(str, '[');
- int n;
-
- if (!bracket) {
- if (*str) {
- CERROR("Invalid pattern %s\n", str);
- goto failed;
- }
- if (c != ncpt) {
- CERROR("expect %d partitions but found %d\n",
- ncpt, c);
- goto failed;
- }
- break;
- }
-
- if (sscanf(str, "%d%n", &cpt, &n) < 1) {
- CERROR("Invalid cpu pattern %s\n", str);
- goto failed;
- }
-
- if (cpt < 0 || cpt >= ncpt) {
- CERROR("Invalid partition id %d, total partitions %d\n",
- cpt, ncpt);
- goto failed;
- }
-
- if (cfs_cpt_weight(cptab, cpt)) {
- CERROR("Partition %d has already been set.\n", cpt);
- goto failed;
- }
-
- str = strim(str + n);
- if (str != bracket) {
- CERROR("Invalid pattern %s\n", str);
- goto failed;
- }
-
- bracket = strchr(str, ']');
- if (!bracket) {
- CERROR("missing right bracket for cpt %d, %s\n",
- cpt, str);
- goto failed;
- }
-
- if (cfs_expr_list_parse(str, (bracket - str) + 1,
- 0, high, &el)) {
- CERROR("Can't parse number range: %s\n", str);
- goto failed;
- }
-
- list_for_each_entry(range, &el->el_exprs, re_link) {
- for (i = range->re_lo; i <= range->re_hi; i++) {
- if ((i - range->re_lo) % range->re_stride)
- continue;
-
- rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
- cfs_cpt_set_cpu(cptab, cpt, i);
- if (!rc) {
- cfs_expr_list_free(el);
- goto failed;
- }
- }
- }
-
- cfs_expr_list_free(el);
-
- if (!cfs_cpt_online(cptab, cpt)) {
- CERROR("No online CPU is found on partition %d\n", cpt);
- goto failed;
- }
-
- str = strim(bracket + 1);
- }
-
- return cptab;
-
- failed:
- cfs_cpt_table_free(cptab);
- return NULL;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static enum cpuhp_state lustre_cpu_online;
-
-static void cfs_cpu_incr_cpt_version(void)
-{
- spin_lock(&cpt_data.cpt_lock);
- cpt_data.cpt_version++;
- spin_unlock(&cpt_data.cpt_lock);
-}
-
-static int cfs_cpu_online(unsigned int cpu)
-{
- cfs_cpu_incr_cpt_version();
- return 0;
-}
-
-static int cfs_cpu_dead(unsigned int cpu)
-{
- bool warn;
-
- cfs_cpu_incr_cpt_version();
-
- mutex_lock(&cpt_data.cpt_mutex);
- /* if all HTs in a core are offline, it may break affinity */
- cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
- warn = cpumask_any_and(cpt_data.cpt_cpumask,
- cpu_online_mask) >= nr_cpu_ids;
- mutex_unlock(&cpt_data.cpt_mutex);
- CDEBUG(warn ? D_WARNING : D_INFO,
- "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
- cpu);
- return 0;
-}
-#endif
-
-void
-cfs_cpu_fini(void)
-{
- if (cfs_cpt_table)
- cfs_cpt_table_free(cfs_cpt_table);
-
-#ifdef CONFIG_HOTPLUG_CPU
- if (lustre_cpu_online > 0)
- cpuhp_remove_state_nocalls(lustre_cpu_online);
- cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
-#endif
- free_cpumask_var(cpt_data.cpt_cpumask);
-}
-
-int
-cfs_cpu_init(void)
-{
- int ret = 0;
-
- LASSERT(!cfs_cpt_table);
-
- memset(&cpt_data, 0, sizeof(cpt_data));
-
- if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) {
- CERROR("Failed to allocate scratch buffer\n");
- return -1;
- }
-
- spin_lock_init(&cpt_data.cpt_lock);
- mutex_init(&cpt_data.cpt_mutex);
-
-#ifdef CONFIG_HOTPLUG_CPU
- ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
- "staging/lustre/cfe:dead", NULL,
- cfs_cpu_dead);
- if (ret < 0)
- goto failed;
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "staging/lustre/cfe:online",
- cfs_cpu_online, NULL);
- if (ret < 0)
- goto failed;
- lustre_cpu_online = ret;
-#endif
- ret = -EINVAL;
-
- if (*cpu_pattern) {
- char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
-
- if (!cpu_pattern_dup) {
- CERROR("Failed to duplicate cpu_pattern\n");
- goto failed;
- }
-
- cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern_dup);
- kfree(cpu_pattern_dup);
- if (!cfs_cpt_table) {
- CERROR("Failed to create cptab from pattern %s\n",
- cpu_pattern);
- goto failed;
- }
-
- } else {
- cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
- if (!cfs_cpt_table) {
- CERROR("Failed to create ptable with npartitions %d\n",
- cpu_npartitions);
- goto failed;
- }
- }
-
- spin_lock(&cpt_data.cpt_lock);
- if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
- spin_unlock(&cpt_data.cpt_lock);
- CERROR("CPU hotplug/unplug during setup\n");
- goto failed;
- }
- spin_unlock(&cpt_data.cpt_lock);
-
- LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
- num_online_nodes(), num_online_cpus(),
- cfs_cpt_number(cfs_cpt_table));
- return 0;
-
- failed:
- cfs_cpu_fini();
- return ret;
-}
-
-#endif
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c
deleted file mode 100644
index db81ed527452..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c
+++ /dev/null
@@ -1,139 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- */
-
-/*
- * This is crypto api shash wrappers to zlib_adler32.
- */
-
-#include <linux/module.h>
-#include <linux/zutil.h>
-#include <crypto/internal/hash.h>
-#include "linux-crypto.h"
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-static int adler32_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = 1;
-
- return 0;
-}
-
-static int adler32_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32)) {
- crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return -EINVAL;
- }
- *mctx = *(u32 *)key;
- return 0;
-}
-
-static int adler32_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *cksump = shash_desc_ctx(desc);
-
- *cksump = *mctx;
-
- return 0;
-}
-
-static int adler32_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- u32 *cksump = shash_desc_ctx(desc);
-
- *cksump = zlib_adler32(*cksump, data, len);
- return 0;
-}
-
-static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(u32 *)out = zlib_adler32(*cksump, data, len);
- return 0;
-}
-
-static int adler32_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __adler32_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int adler32_final(struct shash_desc *desc, u8 *out)
-{
- u32 *cksump = shash_desc_ctx(desc);
-
- *(u32 *)out = *cksump;
- return 0;
-}
-
-static int adler32_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len,
- out);
-}
-
-static struct shash_alg alg = {
- .setkey = adler32_setkey,
- .init = adler32_init,
- .update = adler32_update,
- .final = adler32_final,
- .finup = adler32_finup,
- .digest = adler32_digest,
- .descsize = sizeof(u32),
- .digestsize = CHKSUM_DIGEST_SIZE,
- .base = {
- .cra_name = "adler32",
- .cra_driver_name = "adler32-zlib",
- .cra_priority = 100,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(u32),
- .cra_module = THIS_MODULE,
- .cra_init = adler32_cra_init,
- }
-};
-
-int cfs_crypto_adler32_register(void)
-{
- return crypto_register_shash(&alg);
-}
-
-void cfs_crypto_adler32_unregister(void)
-{
- crypto_unregister_shash(&alg);
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
deleted file mode 100644
index b55006264155..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
+++ /dev/null
@@ -1,443 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-
-#include <crypto/hash.h>
-#include <linux/scatterlist.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_crypto.h>
-#include "linux-crypto.h"
-
-/**
- * Array of hash algorithm speed in MByte per second
- */
-static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
-
-/**
- * Initialize the state descriptor for the specified hash algorithm.
- *
- * An internal routine to allocate the hash-specific state in \a req for
- * use with cfs_crypto_hash_digest() to compute the hash of a single message,
- * though possibly in multiple chunks. The descriptor internal state should
- * be freed with cfs_crypto_hash_final().
- *
- * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*)
- * \param[out] type pointer to the hash description in hash_types[]
- * array
- * \param[in,out] req hash state descriptor to be initialized
- * \param[in] key initial hash value/state, NULL to use default
- * value
- * \param[in] key_len length of \a key
- *
- * \retval 0 on success
- * \retval negative errno on failure
- */
-static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
- const struct cfs_crypto_hash_type **type,
- struct ahash_request **req,
- unsigned char *key,
- unsigned int key_len)
-{
- struct crypto_ahash *tfm;
- int err = 0;
-
- *type = cfs_crypto_hash_type(hash_alg);
-
- if (!*type) {
- CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
- hash_alg, CFS_HASH_ALG_MAX);
- return -EINVAL;
- }
- tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC);
-
- if (IS_ERR(tfm)) {
- CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n",
- (*type)->cht_name);
- return PTR_ERR(tfm);
- }
-
- *req = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!*req) {
- CDEBUG(D_INFO, "Failed to alloc ahash_request for %s\n",
- (*type)->cht_name);
- crypto_free_ahash(tfm);
- return -ENOMEM;
- }
-
- ahash_request_set_callback(*req, 0, NULL, NULL);
-
- if (key)
- err = crypto_ahash_setkey(tfm, key, key_len);
- else if ((*type)->cht_key)
- err = crypto_ahash_setkey(tfm,
- (unsigned char *)&((*type)->cht_key),
- (*type)->cht_size);
-
- if (err) {
- ahash_request_free(*req);
- crypto_free_ahash(tfm);
- return err;
- }
-
- CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
- crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm),
- cfs_crypto_hash_speeds[hash_alg]);
-
- err = crypto_ahash_init(*req);
- if (err) {
- ahash_request_free(*req);
- crypto_free_ahash(tfm);
- }
- return err;
-}
-
-/**
- * Calculate hash digest for the passed buffer.
- *
- * This should be used when computing the hash on a single contiguous buffer.
- * It combines the hash initialization, computation, and cleanup.
- *
- * \param[in] hash_alg id of hash algorithm (CFS_HASH_ALG_*)
- * \param[in] buf data buffer on which to compute hash
- * \param[in] buf_len length of \a buf in bytes
- * \param[in] key initial value/state for algorithm,
- * if \a key = NULL use default initial value
- * \param[in] key_len length of \a key in bytes
- * \param[out] hash pointer to computed hash value,
- * if \a hash = NULL then \a hash_len is to digest
- * size in bytes, retval -ENOSPC
- * \param[in,out] hash_len size of \a hash buffer
- *
- * \retval -EINVAL \a buf, \a buf_len, \a hash_len,
- * \a hash_alg invalid
- * \retval -ENOENT \a hash_alg is unsupported
- * \retval -ENOSPC \a hash is NULL, or \a hash_len less than
- * digest size
- * \retval 0 for success
- * \retval negative errno for other errors from lower
- * layers.
- */
-int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
- const void *buf, unsigned int buf_len,
- unsigned char *key, unsigned int key_len,
- unsigned char *hash, unsigned int *hash_len)
-{
- struct scatterlist sl;
- struct ahash_request *req;
- int err;
- const struct cfs_crypto_hash_type *type;
-
- if (!buf || !buf_len || !hash_len)
- return -EINVAL;
-
- err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
- if (err)
- return err;
-
- if (!hash || *hash_len < type->cht_size) {
- *hash_len = type->cht_size;
- crypto_free_ahash(crypto_ahash_reqtfm(req));
- ahash_request_free(req);
- return -ENOSPC;
- }
- sg_init_one(&sl, buf, buf_len);
-
- ahash_request_set_crypt(req, &sl, hash, sl.length);
- err = crypto_ahash_digest(req);
- crypto_free_ahash(crypto_ahash_reqtfm(req));
- ahash_request_free(req);
-
- return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_digest);
-
-/**
- * Allocate and initialize descriptor for hash algorithm.
- *
- * This should be used to initialize a hash descriptor for multiple calls
- * to a single hash function when computing the hash across multiple
- * separate buffers or pages using cfs_crypto_hash_update{,_page}().
- *
- * The hash descriptor should be freed with cfs_crypto_hash_final().
- *
- * \param[in] hash_alg algorithm id (CFS_HASH_ALG_*)
- * \param[in] key initial value/state for algorithm, if \a key = NULL
- * use default initial value
- * \param[in] key_len length of \a key in bytes
- *
- * \retval pointer to descriptor of hash instance
- * \retval ERR_PTR(errno) in case of error
- */
-struct ahash_request *
-cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
- unsigned char *key, unsigned int key_len)
-{
- struct ahash_request *req;
- int err;
- const struct cfs_crypto_hash_type *type;
-
- err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
-
- if (err)
- return ERR_PTR(err);
- return req;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_init);
-
-/**
- * Update hash digest computed on data within the given \a page
- *
- * \param[in] hreq hash state descriptor
- * \param[in] page data page on which to compute the hash
- * \param[in] offset offset within \a page at which to start hash
- * \param[in] len length of data on which to compute hash
- *
- * \retval 0 for success
- * \retval negative errno on failure
- */
-int cfs_crypto_hash_update_page(struct ahash_request *req,
- struct page *page, unsigned int offset,
- unsigned int len)
-{
- struct scatterlist sl;
-
- sg_init_table(&sl, 1);
- sg_set_page(&sl, page, len, offset & ~PAGE_MASK);
-
- ahash_request_set_crypt(req, &sl, NULL, sl.length);
- return crypto_ahash_update(req);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update_page);
-
-/**
- * Update hash digest computed on the specified data
- *
- * \param[in] req hash state descriptor
- * \param[in] buf data buffer on which to compute the hash
- * \param[in] buf_len length of \buf on which to compute hash
- *
- * \retval 0 for success
- * \retval negative errno on failure
- */
-int cfs_crypto_hash_update(struct ahash_request *req,
- const void *buf, unsigned int buf_len)
-{
- struct scatterlist sl;
-
- sg_init_one(&sl, buf, buf_len);
-
- ahash_request_set_crypt(req, &sl, NULL, sl.length);
- return crypto_ahash_update(req);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update);
-
-/**
- * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor
- *
- * \param[in] req hash descriptor
- * \param[out] hash pointer to hash buffer to store hash digest
- * \param[in,out] hash_len pointer to hash buffer size, if \a req = NULL
- * only free \a req instead of computing the hash
- *
- * \retval 0 for success
- * \retval -EOVERFLOW if hash_len is too small for the hash digest
- * \retval negative errno for other errors from lower layers
- */
-int cfs_crypto_hash_final(struct ahash_request *req,
- unsigned char *hash, unsigned int *hash_len)
-{
- int err;
- int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
-
- if (!hash || !hash_len) {
- err = 0;
- goto free_ahash;
- }
- if (*hash_len < size) {
- err = -EOVERFLOW;
- goto free_ahash;
- }
-
- ahash_request_set_crypt(req, NULL, hash, 0);
- err = crypto_ahash_final(req);
- if (!err)
- *hash_len = size;
-free_ahash:
- crypto_free_ahash(crypto_ahash_reqtfm(req));
- ahash_request_free(req);
- return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_final);
-
-/**
- * Compute the speed of specified hash function
- *
- * Run a speed test on the given hash algorithm on buffer of the given size.
- * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and
- * is available through the cfs_crypto_hash_speed() function.
- *
- * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*)
- * \param[in] buf data buffer on which to compute the hash
- * \param[in] buf_len length of \buf on which to compute hash
- */
-static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg)
-{
- int buf_len = max(PAGE_SIZE, 1048576UL);
- void *buf;
- unsigned long start, end;
- int bcount, err = 0;
- struct page *page;
- unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
- unsigned int hash_len = sizeof(hash);
-
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- err = -ENOMEM;
- goto out_err;
- }
-
- buf = kmap(page);
- memset(buf, 0xAD, PAGE_SIZE);
- kunmap(page);
-
- for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC),
- bcount = 0; time_before(jiffies, end); bcount++) {
- struct ahash_request *hdesc;
- int i;
-
- hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0);
- if (IS_ERR(hdesc)) {
- err = PTR_ERR(hdesc);
- break;
- }
-
- for (i = 0; i < buf_len / PAGE_SIZE; i++) {
- err = cfs_crypto_hash_update_page(hdesc, page, 0,
- PAGE_SIZE);
- if (err)
- break;
- }
-
- err = cfs_crypto_hash_final(hdesc, hash, &hash_len);
- if (err)
- break;
- }
- end = jiffies;
- __free_page(page);
-out_err:
- if (err) {
- cfs_crypto_hash_speeds[hash_alg] = err;
- CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n",
- cfs_crypto_hash_name(hash_alg), err);
- } else {
- unsigned long tmp;
-
- tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
- 1000) / (1024 * 1024);
- cfs_crypto_hash_speeds[hash_alg] = (int)tmp;
- CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n",
- cfs_crypto_hash_name(hash_alg),
- cfs_crypto_hash_speeds[hash_alg]);
- }
-}
-
-/**
- * hash speed in Mbytes per second for valid hash algorithm
- *
- * Return the performance of the specified \a hash_alg that was previously
- * computed using cfs_crypto_performance_test().
- *
- * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*)
- *
- * \retval positive speed of the hash function in MB/s
- * \retval -ENOENT if \a hash_alg is unsupported
- * \retval negative errno if \a hash_alg speed is unavailable
- */
-int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg)
-{
- if (hash_alg < CFS_HASH_ALG_MAX)
- return cfs_crypto_hash_speeds[hash_alg];
- return -ENOENT;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_speed);
-
-/**
- * Run the performance test for all hash algorithms.
- *
- * Run the cfs_crypto_performance_test() benchmark for all of the available
- * hash functions using a 1MB buffer size. This is a reasonable buffer size
- * for Lustre RPCs, even if the actual RPC size is larger or smaller.
- *
- * Since the setup cost and computation speed of various hash algorithms is
- * a function of the buffer size (and possibly internal contention of offload
- * engines), this speed only represents an estimate of the actual speed under
- * actual usage, but is reasonable for comparing available algorithms.
- *
- * The actual speeds are available via cfs_crypto_hash_speed() for later
- * comparison.
- *
- * \retval 0 on success
- * \retval -ENOMEM if no memory is available for test buffer
- */
-static int cfs_crypto_test_hashes(void)
-{
- enum cfs_crypto_hash_alg hash_alg;
-
- for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
- cfs_crypto_performance_test(hash_alg);
-
- return 0;
-}
-
-static int adler32;
-
-/**
- * Register available hash functions
- *
- * \retval 0
- */
-int cfs_crypto_register(void)
-{
- request_module("crc32c");
-
- adler32 = cfs_crypto_adler32_register();
-
- /* check all algorithms and do performance test */
- cfs_crypto_test_hashes();
- return 0;
-}
-
-/**
- * Unregister previously registered hash functions
- */
-void cfs_crypto_unregister(void)
-{
- if (!adler32)
- cfs_crypto_adler32_unregister();
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h
deleted file mode 100644
index 5616e9ea1450..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/**
- * Functions for start/stop shash adler32 algorithm.
- */
-int cfs_crypto_adler32_register(void);
-void cfs_crypto_adler32_unregister(void);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
deleted file mode 100644
index 0092166af258..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
+++ /dev/null
@@ -1,145 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/linux/linux-debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/notifier.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/miscdevice.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-
-#include "../tracefile.h"
-
-#include <linux/kallsyms.h>
-
-char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
-
-/**
- * Upcall function once a Lustre log has been dumped.
- *
- * \param file path of the dumped log
- */
-void libcfs_run_debug_log_upcall(char *file)
-{
- char *argv[3];
- int rc;
- static const char * const envp[] = {
- "HOME=/",
- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL
- };
-
- argv[0] = lnet_debug_log_upcall;
-
- LASSERTF(file, "called on a null filename\n");
- argv[1] = file; /* only need to pass the path of the file */
-
- argv[2] = NULL;
-
- rc = call_usermodehelper(argv[0], argv, (char **)envp, 1);
- if (rc < 0 && rc != -ENOENT) {
- CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
- rc, argv[0], argv[1]);
- } else {
- CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
- argv[0], argv[1]);
- }
-}
-
-/* coverity[+kill] */
-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
-{
- libcfs_catastrophe = 1;
- libcfs_debug_msg(msgdata, "LBUG\n");
-
- if (in_interrupt()) {
- panic("LBUG in interrupt.\n");
- /* not reached */
- }
-
- dump_stack();
- if (!libcfs_panic_on_lbug)
- libcfs_debug_dumplog();
- if (libcfs_panic_on_lbug)
- panic("LBUG");
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (1)
- schedule();
-}
-EXPORT_SYMBOL(lbug_with_loc);
-
-static int panic_notifier(struct notifier_block *self, unsigned long unused1,
- void *unused2)
-{
- if (libcfs_panic_in_progress)
- return 0;
-
- libcfs_panic_in_progress = 1;
- mb();
-
- return 0;
-}
-
-static struct notifier_block libcfs_panic_notifier = {
- .notifier_call = panic_notifier,
- .next = NULL,
- .priority = 10000,
-};
-
-void libcfs_register_panic_notifier(void)
-{
- atomic_notifier_chain_register(&panic_notifier_list,
- &libcfs_panic_notifier);
-}
-
-void libcfs_unregister_panic_notifier(void)
-{
- atomic_notifier_chain_unregister(&panic_notifier_list,
- &libcfs_panic_notifier);
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
deleted file mode 100644
index ddf625669bff..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
+++ /dev/null
@@ -1,197 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-
-#define LNET_MINOR 240
-
-static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
-{
- size_t len = sizeof(*data);
-
- len += cfs_size_round(data->ioc_inllen1);
- len += cfs_size_round(data->ioc_inllen2);
- return len;
-}
-
-static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
-{
- if (data->ioc_hdr.ioc_len > BIT(30)) {
- CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
- return true;
- }
- if (data->ioc_inllen1 > BIT(30)) {
- CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
- return true;
- }
- if (data->ioc_inllen2 > BIT(30)) {
- CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
- return true;
- }
- if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
- CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
- CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_pbuf1 && !data->ioc_plen1) {
- CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_pbuf2 && !data->ioc_plen2) {
- CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_plen1 && !data->ioc_pbuf1) {
- CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
- return true;
- }
- if (data->ioc_plen2 && !data->ioc_pbuf2) {
- CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
- return true;
- }
- if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
- CERROR("LIBCFS ioctl: packlen != ioc_len\n");
- return true;
- }
- if (data->ioc_inllen1 &&
- data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
- CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
- return true;
- }
- if (data->ioc_inllen2 &&
- data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
- data->ioc_inllen2 - 1] != '\0') {
- CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
- return true;
- }
- return false;
-}
-
-int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
-{
- if (libcfs_ioctl_is_invalid(data)) {
- CERROR("libcfs ioctl: parameter not correctly formatted\n");
- return -EINVAL;
- }
-
- if (data->ioc_inllen1)
- data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
- if (data->ioc_inllen2)
- data->ioc_inlbuf2 = &data->ioc_bulk[0] +
- cfs_size_round(data->ioc_inllen1);
-
- return 0;
-}
-
-int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
- const struct libcfs_ioctl_hdr __user *uhdr)
-{
- struct libcfs_ioctl_hdr hdr;
- int err;
-
- if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
- return -EFAULT;
-
- if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
- hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
- CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
- LIBCFS_IOCTL_VERSION, hdr.ioc_version);
- return -EINVAL;
- }
-
- if (hdr.ioc_len < sizeof(hdr)) {
- CERROR("libcfs ioctl: user buffer too small for ioctl\n");
- return -EINVAL;
- }
-
- if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
- CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
- hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
- return -EINVAL;
- }
-
- *hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
- if (!*hdr_pp)
- return -ENOMEM;
-
- if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
- err = -EFAULT;
- goto free;
- }
-
- if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
- (*hdr_pp)->ioc_len != hdr.ioc_len) {
- err = -EINVAL;
- goto free;
- }
-
- return 0;
-
-free:
- kvfree(*hdr_pp);
- return err;
-}
-
-static long
-libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
- if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
- _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
- _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
- CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
- return -EINVAL;
- }
-
- return libcfs_ioctl(cmd, (void __user *)arg);
-}
-
-static const struct file_operations libcfs_fops = {
- .owner = THIS_MODULE,
- .unlocked_ioctl = libcfs_psdev_ioctl,
-};
-
-struct miscdevice libcfs_dev = {
- .minor = LNET_MINOR,
- .name = "lnet",
- .fops = &libcfs_fops,
-};
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
deleted file mode 100644
index 7928d7182634..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-
-#include <linux/libcfs/libcfs.h>
-#include "../tracefile.h"
-
-/* percents to share the total debug memory for each type */
-static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
- 80, /* 80% pages for CFS_TCD_TYPE_PROC */
- 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
- 10 /* 10% pages for CFS_TCD_TYPE_IRQ */
-};
-
-char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-
-static DECLARE_RWSEM(cfs_tracefile_sem);
-
-int cfs_tracefile_init_arch(void)
-{
- int i;
- int j;
- struct cfs_trace_cpu_data *tcd;
-
- /* initialize trace_data */
- memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
- for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
- cfs_trace_data[i] =
- kmalloc_array(num_possible_cpus(),
- sizeof(union cfs_trace_data_union),
- GFP_KERNEL);
- if (!cfs_trace_data[i])
- goto out;
- }
-
- /* arch related info initialized */
- cfs_tcd_for_each(tcd, i, j) {
- spin_lock_init(&tcd->tcd_lock);
- tcd->tcd_pages_factor = pages_factor[i];
- tcd->tcd_type = i;
- tcd->tcd_cpu = j;
- }
-
- for (i = 0; i < num_possible_cpus(); i++)
- for (j = 0; j < 3; j++) {
- cfs_trace_console_buffers[i][j] =
- kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
- GFP_KERNEL);
-
- if (!cfs_trace_console_buffers[i][j])
- goto out;
- }
-
- return 0;
-
-out:
- cfs_tracefile_fini_arch();
- pr_err("lnet: Not enough memory\n");
- return -ENOMEM;
-}
-
-void cfs_tracefile_fini_arch(void)
-{
- int i;
- int j;
-
- for (i = 0; i < num_possible_cpus(); i++)
- for (j = 0; j < 3; j++) {
- kfree(cfs_trace_console_buffers[i][j]);
- cfs_trace_console_buffers[i][j] = NULL;
- }
-
- for (i = 0; cfs_trace_data[i]; i++) {
- kfree(cfs_trace_data[i]);
- cfs_trace_data[i] = NULL;
- }
-}
-
-void cfs_tracefile_read_lock(void)
-{
- down_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_read_unlock(void)
-{
- up_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_lock(void)
-{
- down_write(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_unlock(void)
-{
- up_write(&cfs_tracefile_sem);
-}
-
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
-{
- if (in_irq())
- return CFS_TCD_TYPE_IRQ;
- if (in_softirq())
- return CFS_TCD_TYPE_SOFTIRQ;
- return CFS_TCD_TYPE_PROC;
-}
-
-/*
- * The walking argument indicates the locking comes from all tcd types
- * iterator and we must lock it and dissable local irqs to avoid deadlocks
- * with other interrupt locks that might be happening. See LU-1311
- * for details.
- */
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
- __acquires(&tcd->tc_lock)
-{
- __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
- if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
- spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
- else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
- spin_lock_bh(&tcd->tcd_lock);
- else if (unlikely(walking))
- spin_lock_irq(&tcd->tcd_lock);
- else
- spin_lock(&tcd->tcd_lock);
- return 1;
-}
-
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
- __releases(&tcd->tcd_lock)
-{
- __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
- if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
- spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
- else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
- spin_unlock_bh(&tcd->tcd_lock);
- else if (unlikely(walking))
- spin_unlock_irq(&tcd->tcd_lock);
- else
- spin_unlock(&tcd->tcd_lock);
-}
-
-void
-cfs_set_ptldebug_header(struct ptldebug_header *header,
- struct libcfs_debug_msg_data *msgdata,
- unsigned long stack)
-{
- struct timespec64 ts;
-
- ktime_get_real_ts64(&ts);
-
- header->ph_subsys = msgdata->msg_subsys;
- header->ph_mask = msgdata->msg_mask;
- header->ph_cpu_id = smp_processor_id();
- header->ph_type = cfs_trace_buf_idx_get();
- /* y2038 safe since all user space treats this as unsigned, but
- * will overflow in 2106
- */
- header->ph_sec = (u32)ts.tv_sec;
- header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
- header->ph_stack = stack;
- header->ph_pid = current->pid;
- header->ph_line_num = msgdata->msg_line;
- header->ph_extern_pid = 0;
-}
-
-static char *
-dbghdr_to_err_string(struct ptldebug_header *hdr)
-{
- switch (hdr->ph_subsys) {
- case S_LND:
- case S_LNET:
- return "LNetError";
- default:
- return "LustreError";
- }
-}
-
-static char *
-dbghdr_to_info_string(struct ptldebug_header *hdr)
-{
- switch (hdr->ph_subsys) {
- case S_LND:
- case S_LNET:
- return "LNet";
- default:
- return "Lustre";
- }
-}
-
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
- const char *buf, int len, const char *file,
- const char *fn)
-{
- char *prefix = "Lustre", *ptype = NULL;
-
- if (mask & D_EMERG) {
- prefix = dbghdr_to_err_string(hdr);
- ptype = KERN_EMERG;
- } else if (mask & D_ERROR) {
- prefix = dbghdr_to_err_string(hdr);
- ptype = KERN_ERR;
- } else if (mask & D_WARNING) {
- prefix = dbghdr_to_info_string(hdr);
- ptype = KERN_WARNING;
- } else if (mask & (D_CONSOLE | libcfs_printk)) {
- prefix = dbghdr_to_info_string(hdr);
- ptype = KERN_INFO;
- }
-
- if (mask & D_CONSOLE) {
- pr_info("%s%s: %.*s", ptype, prefix, len, buf);
- } else {
- pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
- hdr->ph_pid, hdr->ph_extern_pid, file,
- hdr->ph_line_num, fn, len, buf);
- }
-}
-
-int cfs_trace_max_debug_mb(void)
-{
- int total_mb = (totalram_pages >> (20 - PAGE_SHIFT));
-
- return max(512, (total_mb * 80) / 100);
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
deleted file mode 100644
index a03f924f1d7c..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ /dev/null
@@ -1,604 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/list.h>
-
-#include <linux/sysctl.h>
-#include <linux/debugfs.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-#include <asm/div64.h>
-
-#include <linux/libcfs/libcfs_crypto.h>
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-#include "tracefile.h"
-
-static struct dentry *lnet_debugfs_root;
-
-static DECLARE_RWSEM(ioctl_list_sem);
-static LIST_HEAD(ioctl_list);
-
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
-{
- int rc = 0;
-
- down_write(&ioctl_list_sem);
- if (!list_empty(&hand->item))
- rc = -EBUSY;
- else
- list_add_tail(&hand->item, &ioctl_list);
- up_write(&ioctl_list_sem);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_register_ioctl);
-
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
-{
- int rc = 0;
-
- down_write(&ioctl_list_sem);
- if (list_empty(&hand->item))
- rc = -ENOENT;
- else
- list_del_init(&hand->item);
- up_write(&ioctl_list_sem);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_deregister_ioctl);
-
-int libcfs_ioctl(unsigned long cmd, void __user *uparam)
-{
- struct libcfs_ioctl_data *data = NULL;
- struct libcfs_ioctl_hdr *hdr;
- int err;
-
- /* 'cmd' and permissions get checked in our arch-specific caller */
- err = libcfs_ioctl_getdata(&hdr, uparam);
- if (err) {
- CDEBUG_LIMIT(D_ERROR,
- "libcfs ioctl: data header error %d\n", err);
- return err;
- }
-
- if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) {
- /*
- * The libcfs_ioctl_data_adjust() function performs adjustment
- * operations on the libcfs_ioctl_data structure to make
- * it usable by the code. This doesn't need to be called
- * for new data structures added.
- */
- data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
- err = libcfs_ioctl_data_adjust(data);
- if (err)
- goto out;
- }
-
- CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd);
- switch (cmd) {
- case IOC_LIBCFS_CLEAR_DEBUG:
- libcfs_debug_clear_buffer();
- break;
-
- case IOC_LIBCFS_MARK_DEBUG:
- if (!data || !data->ioc_inlbuf1 ||
- data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') {
- err = -EINVAL;
- goto out;
- }
- libcfs_debug_mark_buffer(data->ioc_inlbuf1);
- break;
-
- default: {
- struct libcfs_ioctl_handler *hand;
-
- err = -EINVAL;
- down_read(&ioctl_list_sem);
- list_for_each_entry(hand, &ioctl_list, item) {
- err = hand->handle_ioctl(cmd, hdr);
- if (err == -EINVAL)
- continue;
-
- if (!err) {
- if (copy_to_user(uparam, hdr, hdr->ioc_len))
- err = -EFAULT;
- }
- break;
- }
- up_read(&ioctl_list_sem);
- break; }
- }
-out:
- kvfree(hdr);
- return err;
-}
-
-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
- void __user *buffer, size_t *lenp,
- int (*handler)(void *data, int write, loff_t pos,
- void __user *buffer, int len))
-{
- int rc = handler(data, write, *ppos, buffer, *lenp);
-
- if (rc < 0)
- return rc;
-
- if (write) {
- *ppos += *lenp;
- } else {
- *lenp = rc;
- *ppos += rc;
- }
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_call_handler);
-
-static int __proc_dobitmasks(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- const int tmpstrlen = 512;
- char *tmpstr;
- int rc;
- unsigned int *mask = data;
- int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
- int is_printk = (mask == &libcfs_printk) ? 1 : 0;
-
- rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
- if (rc < 0)
- return rc;
-
- if (!write) {
- libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
- rc = strlen(tmpstr);
-
- if (pos >= rc) {
- rc = 0;
- } else {
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, "\n");
- }
- } else {
- rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
- if (rc < 0) {
- kfree(tmpstr);
- return rc;
- }
-
- rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
- /* Always print LBUG/LASSERT to console, so keep this mask */
- if (is_printk)
- *mask |= D_EMERG;
- }
-
- kfree(tmpstr);
- return rc;
-}
-
-static int proc_dobitmasks(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_dobitmasks);
-}
-
-static int __proc_dump_kernel(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- if (!write)
- return 0;
-
- return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
-}
-
-static int proc_dump_kernel(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_dump_kernel);
-}
-
-static int __proc_daemon_file(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- if (!write) {
- int len = strlen(cfs_tracefile);
-
- if (pos >= len)
- return 0;
-
- return cfs_trace_copyout_string(buffer, nob,
- cfs_tracefile + pos, "\n");
- }
-
- return cfs_trace_daemon_command_usrstr(buffer, nob);
-}
-
-static int proc_daemon_file(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_daemon_file);
-}
-
-static int libcfs_force_lbug(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- if (write)
- LBUG();
- return 0;
-}
-
-static int proc_fail_loc(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- int rc;
- long old_fail_loc = cfs_fail_loc;
-
- rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
- if (old_fail_loc != cfs_fail_loc)
- wake_up(&cfs_race_waitq);
- return rc;
-}
-
-static int __proc_cpt_table(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- char *buf = NULL;
- int len = 4096;
- int rc = 0;
-
- if (write)
- return -EPERM;
-
- LASSERT(cfs_cpt_table);
-
- while (1) {
- buf = kzalloc(len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- rc = cfs_cpt_table_print(cfs_cpt_table, buf, len);
- if (rc >= 0)
- break;
-
- if (rc == -EFBIG) {
- kfree(buf);
- len <<= 1;
- continue;
- }
- goto out;
- }
-
- if (pos >= rc) {
- rc = 0;
- goto out;
- }
-
- rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
- out:
- kfree(buf);
- return rc;
-}
-
-static int proc_cpt_table(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_cpt_table);
-}
-
-static struct ctl_table lnet_table[] = {
- {
- .procname = "debug",
- .data = &libcfs_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks,
- },
- {
- .procname = "subsystem_debug",
- .data = &libcfs_subsystem_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks,
- },
- {
- .procname = "printk",
- .data = &libcfs_printk,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks,
- },
- {
- .procname = "cpu_partition_table",
- .maxlen = 128,
- .mode = 0444,
- .proc_handler = &proc_cpt_table,
- },
- {
- .procname = "debug_log_upcall",
- .data = lnet_debug_log_upcall,
- .maxlen = sizeof(lnet_debug_log_upcall),
- .mode = 0644,
- .proc_handler = &proc_dostring,
- },
- {
- .procname = "catastrophe",
- .data = &libcfs_catastrophe,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .procname = "dump_kernel",
- .maxlen = 256,
- .mode = 0200,
- .proc_handler = &proc_dump_kernel,
- },
- {
- .procname = "daemon_file",
- .mode = 0644,
- .maxlen = 256,
- .proc_handler = &proc_daemon_file,
- },
- {
- .procname = "force_lbug",
- .data = NULL,
- .maxlen = 0,
- .mode = 0200,
- .proc_handler = &libcfs_force_lbug
- },
- {
- .procname = "fail_loc",
- .data = &cfs_fail_loc,
- .maxlen = sizeof(cfs_fail_loc),
- .mode = 0644,
- .proc_handler = &proc_fail_loc
- },
- {
- .procname = "fail_val",
- .data = &cfs_fail_val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .procname = "fail_err",
- .data = &cfs_fail_err,
- .maxlen = sizeof(cfs_fail_err),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- }
-};
-
-static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = {
- { "console_ratelimit",
- "/sys/module/libcfs/parameters/libcfs_console_ratelimit"},
- { "debug_path",
- "/sys/module/libcfs/parameters/libcfs_debug_file_path"},
- { "panic_on_lbug",
- "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"},
- { "libcfs_console_backoff",
- "/sys/module/libcfs/parameters/libcfs_console_backoff"},
- { "debug_mb",
- "/sys/module/libcfs/parameters/libcfs_debug_mb"},
- { "console_min_delay_centisecs",
- "/sys/module/libcfs/parameters/libcfs_console_min_delay"},
- { "console_max_delay_centisecs",
- "/sys/module/libcfs/parameters/libcfs_console_max_delay"},
- {},
-};
-
-static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct ctl_table *table = filp->private_data;
- int error;
-
- error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos);
- if (!error)
- error = count;
-
- return error;
-}
-
-static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct ctl_table *table = filp->private_data;
- int error;
-
- error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos);
- if (!error)
- error = count;
-
- return error;
-}
-
-static const struct file_operations lnet_debugfs_file_operations_rw = {
- .open = simple_open,
- .read = lnet_debugfs_read,
- .write = lnet_debugfs_write,
- .llseek = default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_ro = {
- .open = simple_open,
- .read = lnet_debugfs_read,
- .llseek = default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_wo = {
- .open = simple_open,
- .write = lnet_debugfs_write,
- .llseek = default_llseek,
-};
-
-static const struct file_operations *lnet_debugfs_fops_select(umode_t mode)
-{
- if (!(mode & 0222))
- return &lnet_debugfs_file_operations_ro;
-
- if (!(mode & 0444))
- return &lnet_debugfs_file_operations_wo;
-
- return &lnet_debugfs_file_operations_rw;
-}
-
-void lustre_insert_debugfs(struct ctl_table *table,
- const struct lnet_debugfs_symlink_def *symlinks)
-{
- if (!lnet_debugfs_root)
- lnet_debugfs_root = debugfs_create_dir("lnet", NULL);
-
- /* Even if we cannot create, just ignore it altogether) */
- if (IS_ERR_OR_NULL(lnet_debugfs_root))
- return;
-
- /* We don't save the dentry returned in next two calls, because
- * we don't call debugfs_remove() but rather remove_recursive()
- */
- for (; table->procname; table++)
- debugfs_create_file(table->procname, table->mode,
- lnet_debugfs_root, table,
- lnet_debugfs_fops_select(table->mode));
-
- for (; symlinks && symlinks->name; symlinks++)
- debugfs_create_symlink(symlinks->name, lnet_debugfs_root,
- symlinks->target);
-}
-EXPORT_SYMBOL_GPL(lustre_insert_debugfs);
-
-static void lustre_remove_debugfs(void)
-{
- debugfs_remove_recursive(lnet_debugfs_root);
-
- lnet_debugfs_root = NULL;
-}
-
-static int libcfs_init(void)
-{
- int rc;
-
- rc = libcfs_debug_init(5 * 1024 * 1024);
- if (rc < 0) {
- pr_err("LustreError: libcfs_debug_init: %d\n", rc);
- return rc;
- }
-
- rc = cfs_cpu_init();
- if (rc)
- goto cleanup_debug;
-
- rc = misc_register(&libcfs_dev);
- if (rc) {
- CERROR("misc_register: error %d\n", rc);
- goto cleanup_cpu;
- }
-
- cfs_rehash_wq = alloc_workqueue("cfs_rh", WQ_SYSFS, 4);
- if (!cfs_rehash_wq) {
- CERROR("Failed to start rehash workqueue.\n");
- rc = -ENOMEM;
- goto cleanup_deregister;
- }
-
- rc = cfs_crypto_register();
- if (rc) {
- CERROR("cfs_crypto_register: error %d\n", rc);
- goto cleanup_deregister;
- }
-
- lustre_insert_debugfs(lnet_table, lnet_debugfs_symlinks);
-
- CDEBUG(D_OTHER, "portals setup OK\n");
- return 0;
- cleanup_deregister:
- misc_deregister(&libcfs_dev);
-cleanup_cpu:
- cfs_cpu_fini();
- cleanup_debug:
- libcfs_debug_cleanup();
- return rc;
-}
-
-static void libcfs_exit(void)
-{
- int rc;
-
- lustre_remove_debugfs();
-
- if (cfs_rehash_wq) {
- destroy_workqueue(cfs_rehash_wq);
- cfs_rehash_wq = NULL;
- }
-
- cfs_crypto_unregister();
-
- misc_deregister(&libcfs_dev);
-
- cfs_cpu_fini();
-
- rc = libcfs_debug_cleanup();
- if (rc)
- pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre helper library");
-MODULE_VERSION(LIBCFS_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(libcfs_init);
-module_exit(libcfs_exit);
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
deleted file mode 100644
index 4affca750bc5..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ /dev/null
@@ -1,1191 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/tracefile.c
- *
- * Author: Zach Brown <zab@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-#define pr_fmt(fmt) "Lustre: " fmt
-#include "tracefile.h"
-
-#include <linux/libcfs/libcfs.h>
-
-/* XXX move things up to the top, comment */
-union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
-
-char cfs_tracefile[TRACEFILE_NAME_SIZE];
-long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
-static struct tracefiled_ctl trace_tctl;
-static DEFINE_MUTEX(cfs_trace_thread_mutex);
-static int thread_running;
-
-static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
-
-struct page_collection {
- struct list_head pc_pages;
- /*
- * if this flag is set, collect_pages() will spill both
- * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
- * only ->tcd_pages are spilled.
- */
- int pc_want_daemon_pages;
-};
-
-struct tracefiled_ctl {
- struct completion tctl_start;
- struct completion tctl_stop;
- wait_queue_head_t tctl_waitq;
- pid_t tctl_pid;
- atomic_t tctl_shutdown;
-};
-
-/*
- * small data-structure for each page owned by tracefiled.
- */
-struct cfs_trace_page {
- /*
- * page itself
- */
- struct page *page;
- /*
- * linkage into one of the lists in trace_data_union or
- * page_collection
- */
- struct list_head linkage;
- /*
- * number of bytes used within this page
- */
- unsigned int used;
- /*
- * cpu that owns this page
- */
- unsigned short cpu;
- /*
- * type(context) of this page
- */
- unsigned short type;
-};
-
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct cfs_trace_cpu_data *tcd);
-
-static inline struct cfs_trace_page *
-cfs_tage_from_list(struct list_head *list)
-{
- return list_entry(list, struct cfs_trace_page, linkage);
-}
-
-static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
-{
- struct page *page;
- struct cfs_trace_page *tage;
-
- /* My caller is trying to free memory */
- if (!in_interrupt() && memory_pressure_get())
- return NULL;
-
- /*
- * Don't spam console with allocation failures: they will be reported
- * by upper layer anyway.
- */
- gfp |= __GFP_NOWARN;
- page = alloc_page(gfp);
- if (!page)
- return NULL;
-
- tage = kmalloc(sizeof(*tage), gfp);
- if (!tage) {
- __free_page(page);
- return NULL;
- }
-
- tage->page = page;
- atomic_inc(&cfs_tage_allocated);
- return tage;
-}
-
-static void cfs_tage_free(struct cfs_trace_page *tage)
-{
- __free_page(tage->page);
- kfree(tage);
- atomic_dec(&cfs_tage_allocated);
-}
-
-static void cfs_tage_to_tail(struct cfs_trace_page *tage,
- struct list_head *queue)
-{
- list_move_tail(&tage->linkage, queue);
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
- struct list_head *stock)
-{
- int i;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
- struct cfs_trace_page *tage;
-
- tage = cfs_tage_alloc(gfp);
- if (!tage)
- break;
- list_add_tail(&tage->linkage, stock);
- }
- return i;
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *
-cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
-{
- struct cfs_trace_page *tage;
-
- if (tcd->tcd_cur_pages > 0) {
- __LASSERT(!list_empty(&tcd->tcd_pages));
- tage = cfs_tage_from_list(tcd->tcd_pages.prev);
- if (tage->used + len <= PAGE_SIZE)
- return tage;
- }
-
- if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
- if (tcd->tcd_cur_stock_pages > 0) {
- tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
- --tcd->tcd_cur_stock_pages;
- list_del_init(&tage->linkage);
- } else {
- tage = cfs_tage_alloc(GFP_ATOMIC);
- if (unlikely(!tage)) {
- if (!memory_pressure_get() || in_interrupt())
- pr_warn_ratelimited("cannot allocate a tage (%ld)\n",
- tcd->tcd_cur_pages);
- return NULL;
- }
- }
-
- tage->used = 0;
- tage->cpu = smp_processor_id();
- tage->type = tcd->tcd_type;
- list_add_tail(&tage->linkage, &tcd->tcd_pages);
- tcd->tcd_cur_pages++;
-
- if (tcd->tcd_cur_pages > 8 && thread_running) {
- struct tracefiled_ctl *tctl = &trace_tctl;
- /*
- * wake up tracefiled to process some pages.
- */
- wake_up(&tctl->tctl_waitq);
- }
- return tage;
- }
- return NULL;
-}
-
-static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
-{
- int pgcount = tcd->tcd_cur_pages / 10;
- struct page_collection pc;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- pr_warn_ratelimited("debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
- pgcount + 1, tcd->tcd_cur_pages);
-
- INIT_LIST_HEAD(&pc.pc_pages);
-
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
- if (!pgcount--)
- break;
-
- list_move_tail(&tage->linkage, &pc.pc_pages);
- tcd->tcd_cur_pages--;
- }
- put_pages_on_tcd_daemon_list(&pc, tcd);
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
- unsigned long len)
-{
- struct cfs_trace_page *tage;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- if (len > PAGE_SIZE) {
- pr_err("cowardly refusing to write %lu bytes in a page\n", len);
- return NULL;
- }
-
- tage = cfs_trace_get_tage_try(tcd, len);
- if (tage)
- return tage;
- if (thread_running)
- cfs_tcd_shrink(tcd);
- if (tcd->tcd_cur_pages > 0) {
- tage = cfs_tage_from_list(tcd->tcd_pages.next);
- tage->used = 0;
- cfs_tage_to_tail(tage, &tcd->tcd_pages);
- }
- return tage;
-}
-
-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
- const char *format, ...)
-{
- va_list args;
- int rc;
-
- va_start(args, format);
- rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
- va_end(args);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_debug_msg);
-
-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
- const char *format1, va_list args,
- const char *format2, ...)
-{
- struct cfs_trace_cpu_data *tcd = NULL;
- struct ptldebug_header header = { 0 };
- struct cfs_trace_page *tage;
- /* string_buf is used only if tcd != NULL, and is always set then */
- char *string_buf = NULL;
- char *debug_buf;
- int known_size;
- int needed = 85; /* average message length */
- int max_nob;
- va_list ap;
- int depth;
- int i;
- int remain;
- int mask = msgdata->msg_mask;
- const char *file = kbasename(msgdata->msg_file);
- struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
-
- tcd = cfs_trace_get_tcd();
-
- /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
- * pins us to a particular CPU. This avoids an smp_processor_id()
- * warning on Linux when debugging is enabled.
- */
- cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
-
- if (!tcd) /* arch may not log in IRQ context */
- goto console;
-
- if (!tcd->tcd_cur_pages)
- header.ph_flags |= PH_FLAG_FIRST_RECORD;
-
- if (tcd->tcd_shutting_down) {
- cfs_trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- depth = __current_nesting_level();
- known_size = strlen(file) + 1 + depth;
- if (msgdata->msg_fn)
- known_size += strlen(msgdata->msg_fn) + 1;
-
- if (libcfs_debug_binary)
- known_size += sizeof(header);
-
- /*
- * '2' used because vsnprintf return real size required for output
- * _without_ terminating NULL.
- * if needed is to small for this format.
- */
- for (i = 0; i < 2; i++) {
- tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
- if (!tage) {
- if (needed + known_size > PAGE_SIZE)
- mask |= D_ERROR;
-
- cfs_trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- string_buf = (char *)page_address(tage->page) +
- tage->used + known_size;
-
- max_nob = PAGE_SIZE - tage->used - known_size;
- if (max_nob <= 0) {
- pr_emerg("negative max_nob: %d\n", max_nob);
- mask |= D_ERROR;
- cfs_trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- needed = 0;
- if (format1) {
- va_copy(ap, args);
- needed = vsnprintf(string_buf, max_nob, format1, ap);
- va_end(ap);
- }
-
- if (format2) {
- remain = max_nob - needed;
- if (remain < 0)
- remain = 0;
-
- va_start(ap, format2);
- needed += vsnprintf(string_buf + needed, remain,
- format2, ap);
- va_end(ap);
- }
-
- if (needed < max_nob) /* well. printing ok.. */
- break;
- }
-
- if (*(string_buf + needed - 1) != '\n')
- pr_info("format at %s:%d:%s doesn't end in newline\n", file,
- msgdata->msg_line, msgdata->msg_fn);
-
- header.ph_len = known_size + needed;
- debug_buf = (char *)page_address(tage->page) + tage->used;
-
- if (libcfs_debug_binary) {
- memcpy(debug_buf, &header, sizeof(header));
- tage->used += sizeof(header);
- debug_buf += sizeof(header);
- }
-
- /* indent message according to the nesting level */
- while (depth-- > 0) {
- *(debug_buf++) = '.';
- ++tage->used;
- }
-
- strcpy(debug_buf, file);
- tage->used += strlen(file) + 1;
- debug_buf += strlen(file) + 1;
-
- if (msgdata->msg_fn) {
- strcpy(debug_buf, msgdata->msg_fn);
- tage->used += strlen(msgdata->msg_fn) + 1;
- debug_buf += strlen(msgdata->msg_fn) + 1;
- }
-
- __LASSERT(debug_buf == string_buf);
-
- tage->used += needed;
- __LASSERT(tage->used <= PAGE_SIZE);
-
-console:
- if (!(mask & libcfs_printk)) {
- /* no console output requested */
- if (tcd)
- cfs_trace_put_tcd(tcd);
- return 1;
- }
-
- if (cdls) {
- if (libcfs_console_ratelimit &&
- cdls->cdls_next && /* not first time ever */
- !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
- /* skipping a console message */
- cdls->cdls_count++;
- if (tcd)
- cfs_trace_put_tcd(tcd);
- return 1;
- }
-
- if (cfs_time_after(cfs_time_current(),
- cdls->cdls_next + libcfs_console_max_delay +
- 10 * HZ)) {
- /* last timeout was a long time ago */
- cdls->cdls_delay /= libcfs_console_backoff * 4;
- } else {
- cdls->cdls_delay *= libcfs_console_backoff;
- }
-
- if (cdls->cdls_delay < libcfs_console_min_delay)
- cdls->cdls_delay = libcfs_console_min_delay;
- else if (cdls->cdls_delay > libcfs_console_max_delay)
- cdls->cdls_delay = libcfs_console_max_delay;
-
- /* ensure cdls_next is never zero after it's been seen */
- cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
- }
-
- if (tcd) {
- cfs_print_to_console(&header, mask, string_buf, needed, file,
- msgdata->msg_fn);
- cfs_trace_put_tcd(tcd);
- } else {
- string_buf = cfs_trace_get_console_buffer();
-
- needed = 0;
- if (format1) {
- va_copy(ap, args);
- needed = vsnprintf(string_buf,
- CFS_TRACE_CONSOLE_BUFFER_SIZE,
- format1, ap);
- va_end(ap);
- }
- if (format2) {
- remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
- if (remain > 0) {
- va_start(ap, format2);
- needed += vsnprintf(string_buf + needed, remain,
- format2, ap);
- va_end(ap);
- }
- }
- cfs_print_to_console(&header, mask,
- string_buf, needed, file, msgdata->msg_fn);
-
- put_cpu();
- }
-
- if (cdls && cdls->cdls_count) {
- string_buf = cfs_trace_get_console_buffer();
-
- needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
- "Skipped %d previous similar message%s\n",
- cdls->cdls_count,
- (cdls->cdls_count > 1) ? "s" : "");
-
- cfs_print_to_console(&header, mask,
- string_buf, needed, file, msgdata->msg_fn);
-
- put_cpu();
- cdls->cdls_count = 0;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(libcfs_debug_vmsg2);
-
-void
-cfs_trace_assertion_failed(const char *str,
- struct libcfs_debug_msg_data *msgdata)
-{
- struct ptldebug_header hdr;
-
- libcfs_panic_in_progress = 1;
- libcfs_catastrophe = 1;
- mb();
-
- cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
-
- cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
- msgdata->msg_file, msgdata->msg_fn);
-
- panic("Lustre debug assertion failure\n");
-
- /* not reached */
-}
-
-static void
-panic_collect_pages(struct page_collection *pc)
-{
- /* Do the collect_pages job on a single CPU: assumes that all other
- * CPUs have been stopped during a panic. If this isn't true for some
- * arch, this will have to be implemented separately in each arch.
- */
- struct cfs_trace_cpu_data *tcd;
- int i;
- int j;
-
- INIT_LIST_HEAD(&pc->pc_pages);
-
- cfs_tcd_for_each(tcd, i, j) {
- list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
- tcd->tcd_cur_pages = 0;
-
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
- }
-}
-
-static void collect_pages_on_all_cpus(struct page_collection *pc)
-{
- struct cfs_trace_cpu_data *tcd;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu) {
- list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
- tcd->tcd_cur_pages = 0;
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages,
- &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
- }
- }
-}
-
-static void collect_pages(struct page_collection *pc)
-{
- INIT_LIST_HEAD(&pc->pc_pages);
-
- if (libcfs_panic_in_progress)
- panic_collect_pages(pc);
- else
- collect_pages_on_all_cpus(pc);
-}
-
-static void put_pages_back_on_all_cpus(struct page_collection *pc)
-{
- struct cfs_trace_cpu_data *tcd;
- struct list_head *cur_head;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu) {
- cur_head = tcd->tcd_pages.next;
-
- list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
- linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (tage->cpu != cpu || tage->type != i)
- continue;
-
- cfs_tage_to_tail(tage, cur_head);
- tcd->tcd_cur_pages++;
- }
- }
- }
-}
-
-static void put_pages_back(struct page_collection *pc)
-{
- if (!libcfs_panic_in_progress)
- put_pages_back_on_all_cpus(pc);
-}
-
-/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon.
- */
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct cfs_trace_cpu_data *tcd)
-{
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
- continue;
-
- cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
- tcd->tcd_cur_daemon_pages++;
-
- if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
- struct cfs_trace_page *victim;
-
- __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
- victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
-
- __LASSERT_TAGE_INVARIANT(victim);
-
- list_del(&victim->linkage);
- cfs_tage_free(victim);
- tcd->tcd_cur_daemon_pages--;
- }
- }
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
- struct cfs_trace_cpu_data *tcd;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu)
- put_pages_on_tcd_daemon_list(pc, tcd);
- }
-}
-
-void cfs_trace_debug_print(void)
-{
- struct page_collection pc;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- char *p, *file, *fn;
- struct page *page;
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- page = tage->page;
- p = page_address(page);
- while (p < ((char *)page_address(page) + tage->used)) {
- struct ptldebug_header *hdr;
- int len;
-
- hdr = (void *)p;
- p += sizeof(*hdr);
- file = p;
- p += strlen(file) + 1;
- fn = p;
- p += strlen(fn) + 1;
- len = hdr->ph_len - (int)(p - (char *)hdr);
-
- cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
-
- p += len;
- }
-
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
-}
-
-int cfs_tracefile_dump_all_pages(char *filename)
-{
- struct page_collection pc;
- struct file *filp;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- char *buf;
- mm_segment_t __oldfs;
- int rc;
-
- cfs_tracefile_write_lock();
-
- filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
- 0600);
- if (IS_ERR(filp)) {
- rc = PTR_ERR(filp);
- filp = NULL;
- pr_err("LustreError: can't open %s for dump: rc %d\n",
- filename, rc);
- goto out;
- }
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- if (list_empty(&pc.pc_pages)) {
- rc = 0;
- goto close;
- }
- __oldfs = get_fs();
- set_fs(get_ds());
-
- /* ok, for now, just write the pages. in the future we'll be building
- * iobufs with the pages and calling generic_direct_IO
- */
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- buf = kmap(tage->page);
- rc = kernel_write(filp, buf, tage->used, &filp->f_pos);
- kunmap(tage->page);
-
- if (rc != (int)tage->used) {
- pr_warn("wanted to write %u but wrote %d\n", tage->used,
- rc);
- put_pages_back(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- break;
- }
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
- set_fs(__oldfs);
- rc = vfs_fsync(filp, 1);
- if (rc)
- pr_err("sync returns %d\n", rc);
-close:
- filp_close(filp, NULL);
-out:
- cfs_tracefile_write_unlock();
- return rc;
-}
-
-void cfs_trace_flush_pages(void)
-{
- struct page_collection pc;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
-}
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob)
-{
- int nob;
-
- if (usr_buffer_nob > knl_buffer_nob)
- return -EOVERFLOW;
-
- if (copy_from_user((void *)knl_buffer,
- usr_buffer, usr_buffer_nob))
- return -EFAULT;
-
- nob = strnlen(knl_buffer, usr_buffer_nob);
- while (--nob >= 0) /* strip trailing whitespace */
- if (!isspace(knl_buffer[nob]))
- break;
-
- if (nob < 0) /* empty string */
- return -EINVAL;
-
- if (nob == knl_buffer_nob) /* no space to terminate */
- return -EOVERFLOW;
-
- knl_buffer[nob + 1] = 0; /* terminate */
- return 0;
-}
-EXPORT_SYMBOL(cfs_trace_copyin_string);
-
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
- const char *knl_buffer, char *append)
-{
- /*
- * NB if 'append' != NULL, it's a single character to append to the
- * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
- */
- int nob = strlen(knl_buffer);
-
- if (nob > usr_buffer_nob)
- nob = usr_buffer_nob;
-
- if (copy_to_user(usr_buffer, knl_buffer, nob))
- return -EFAULT;
-
- if (append && nob < usr_buffer_nob) {
- if (copy_to_user(usr_buffer + nob, append, 1))
- return -EFAULT;
-
- nob++;
- }
-
- return nob;
-}
-EXPORT_SYMBOL(cfs_trace_copyout_string);
-
-int cfs_trace_allocate_string_buffer(char **str, int nob)
-{
- if (nob > 2 * PAGE_SIZE) /* string must be "sensible" */
- return -EINVAL;
-
- *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
- if (!*str)
- return -ENOMEM;
-
- return 0;
-}
-
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
-{
- char *str;
- int rc;
-
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc)
- return rc;
-
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc)
- goto out;
-
- if (str[0] != '/') {
- rc = -EINVAL;
- goto out;
- }
- rc = cfs_tracefile_dump_all_pages(str);
-out:
- kfree(str);
- return rc;
-}
-
-int cfs_trace_daemon_command(char *str)
-{
- int rc = 0;
-
- cfs_tracefile_write_lock();
-
- if (!strcmp(str, "stop")) {
- cfs_tracefile_write_unlock();
- cfs_trace_stop_thread();
- cfs_tracefile_write_lock();
- memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
-
- } else if (!strncmp(str, "size=", 5)) {
- unsigned long tmp;
-
- rc = kstrtoul(str + 5, 10, &tmp);
- if (!rc) {
- if (tmp < 10 || tmp > 20480)
- cfs_tracefile_size = CFS_TRACEFILE_SIZE;
- else
- cfs_tracefile_size = tmp << 20;
- }
- } else if (strlen(str) >= sizeof(cfs_tracefile)) {
- rc = -ENAMETOOLONG;
- } else if (str[0] != '/') {
- rc = -EINVAL;
- } else {
- strcpy(cfs_tracefile, str);
-
- pr_info("debug daemon will attempt to start writing to %s (%lukB max)\n",
- cfs_tracefile,
- (long)(cfs_tracefile_size >> 10));
-
- cfs_trace_start_thread();
- }
-
- cfs_tracefile_write_unlock();
- return rc;
-}
-
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
-{
- char *str;
- int rc;
-
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc)
- return rc;
-
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (!rc)
- rc = cfs_trace_daemon_command(str);
-
- kfree(str);
- return rc;
-}
-
-int cfs_trace_set_debug_mb(int mb)
-{
- int i;
- int j;
- int pages;
- int limit = cfs_trace_max_debug_mb();
- struct cfs_trace_cpu_data *tcd;
-
- if (mb < num_possible_cpus()) {
- pr_warn("%d MB is too small for debug buffer size, setting it to %d MB.\n",
- mb, num_possible_cpus());
- mb = num_possible_cpus();
- }
-
- if (mb > limit) {
- pr_warn("%d MB is too large for debug buffer size, setting it to %d MB.\n",
- mb, limit);
- mb = limit;
- }
-
- mb /= num_possible_cpus();
- pages = mb << (20 - PAGE_SHIFT);
-
- cfs_tracefile_write_lock();
-
- cfs_tcd_for_each(tcd, i, j)
- tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
-
- cfs_tracefile_write_unlock();
-
- return 0;
-}
-
-int cfs_trace_get_debug_mb(void)
-{
- int i;
- int j;
- struct cfs_trace_cpu_data *tcd;
- int total_pages = 0;
-
- cfs_tracefile_read_lock();
-
- cfs_tcd_for_each(tcd, i, j)
- total_pages += tcd->tcd_max_pages;
-
- cfs_tracefile_read_unlock();
-
- return (total_pages >> (20 - PAGE_SHIFT)) + 1;
-}
-
-static int tracefiled(void *arg)
-{
- struct page_collection pc;
- struct tracefiled_ctl *tctl = arg;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- struct file *filp;
- char *buf;
- int last_loop = 0;
- int rc;
-
- /* we're started late enough that we pick up init's fs context */
- /* this is so broken in uml? what on earth is going on? */
-
- complete(&tctl->tctl_start);
-
- while (1) {
- wait_queue_entry_t __wait;
-
- pc.pc_want_daemon_pages = 0;
- collect_pages(&pc);
- if (list_empty(&pc.pc_pages))
- goto end_loop;
-
- filp = NULL;
- cfs_tracefile_read_lock();
- if (cfs_tracefile[0]) {
- filp = filp_open(cfs_tracefile,
- O_CREAT | O_RDWR | O_LARGEFILE,
- 0600);
- if (IS_ERR(filp)) {
- rc = PTR_ERR(filp);
- filp = NULL;
- pr_warn("couldn't open %s: %d\n", cfs_tracefile,
- rc);
- }
- }
- cfs_tracefile_read_unlock();
- if (!filp) {
- put_pages_on_daemon_list(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- goto end_loop;
- }
-
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- static loff_t f_pos;
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (f_pos >= (off_t)cfs_tracefile_size)
- f_pos = 0;
- else if (f_pos > i_size_read(file_inode(filp)))
- f_pos = i_size_read(file_inode(filp));
-
- buf = kmap(tage->page);
- rc = kernel_write(filp, buf, tage->used, &f_pos);
- kunmap(tage->page);
-
- if (rc != (int)tage->used) {
- pr_warn("wanted to write %u but wrote %d\n",
- tage->used, rc);
- put_pages_back(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- break;
- }
- }
-
- filp_close(filp, NULL);
- put_pages_on_daemon_list(&pc);
- if (!list_empty(&pc.pc_pages)) {
- int i;
-
- pr_alert("trace pages aren't empty\n");
- pr_err("total cpus(%d): ", num_possible_cpus());
- for (i = 0; i < num_possible_cpus(); i++)
- if (cpu_online(i))
- pr_cont("%d(on) ", i);
- else
- pr_cont("%d(off) ", i);
- pr_cont("\n");
-
- i = 0;
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
- linkage)
- pr_err("page %d belongs to cpu %d\n",
- ++i, tage->cpu);
- pr_err("There are %d pages unwritten\n", i);
- }
- __LASSERT(list_empty(&pc.pc_pages));
-end_loop:
- if (atomic_read(&tctl->tctl_shutdown)) {
- if (!last_loop) {
- last_loop = 1;
- continue;
- } else {
- break;
- }
- }
- init_waitqueue_entry(&__wait, current);
- add_wait_queue(&tctl->tctl_waitq, &__wait);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ);
- remove_wait_queue(&tctl->tctl_waitq, &__wait);
- }
- complete(&tctl->tctl_stop);
- return 0;
-}
-
-int cfs_trace_start_thread(void)
-{
- struct tracefiled_ctl *tctl = &trace_tctl;
- struct task_struct *task;
- int rc = 0;
-
- mutex_lock(&cfs_trace_thread_mutex);
- if (thread_running)
- goto out;
-
- init_completion(&tctl->tctl_start);
- init_completion(&tctl->tctl_stop);
- init_waitqueue_head(&tctl->tctl_waitq);
- atomic_set(&tctl->tctl_shutdown, 0);
-
- task = kthread_run(tracefiled, tctl, "ktracefiled");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- goto out;
- }
-
- wait_for_completion(&tctl->tctl_start);
- thread_running = 1;
-out:
- mutex_unlock(&cfs_trace_thread_mutex);
- return rc;
-}
-
-void cfs_trace_stop_thread(void)
-{
- struct tracefiled_ctl *tctl = &trace_tctl;
-
- mutex_lock(&cfs_trace_thread_mutex);
- if (thread_running) {
- pr_info("shutting down debug daemon thread...\n");
- atomic_set(&tctl->tctl_shutdown, 1);
- wait_for_completion(&tctl->tctl_stop);
- thread_running = 0;
- }
- mutex_unlock(&cfs_trace_thread_mutex);
-}
-
-int cfs_tracefile_init(int max_pages)
-{
- struct cfs_trace_cpu_data *tcd;
- int i;
- int j;
- int rc;
- int factor;
-
- rc = cfs_tracefile_init_arch();
- if (rc)
- return rc;
-
- cfs_tcd_for_each(tcd, i, j) {
- /* tcd_pages_factor is initialized int tracefile_init_arch. */
- factor = tcd->tcd_pages_factor;
- INIT_LIST_HEAD(&tcd->tcd_pages);
- INIT_LIST_HEAD(&tcd->tcd_stock_pages);
- INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
- tcd->tcd_cur_pages = 0;
- tcd->tcd_cur_stock_pages = 0;
- tcd->tcd_cur_daemon_pages = 0;
- tcd->tcd_max_pages = (max_pages * factor) / 100;
- LASSERT(tcd->tcd_max_pages > 0);
- tcd->tcd_shutting_down = 0;
- }
-
- return 0;
-}
-
-static void trace_cleanup_on_all_cpus(void)
-{
- struct cfs_trace_cpu_data *tcd;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu) {
- tcd->tcd_shutting_down = 1;
-
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
- linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
-
- tcd->tcd_cur_pages = 0;
- }
- }
-}
-
-static void cfs_trace_cleanup(void)
-{
- struct page_collection pc;
-
- INIT_LIST_HEAD(&pc.pc_pages);
-
- trace_cleanup_on_all_cpus();
-
- cfs_tracefile_fini_arch();
-}
-
-void cfs_tracefile_exit(void)
-{
- cfs_trace_stop_thread();
- cfs_trace_cleanup();
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h
deleted file mode 100644
index a29d6eb3a785..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.h
+++ /dev/null
@@ -1,263 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LIBCFS_TRACEFILE_H__
-#define __LIBCFS_TRACEFILE_H__
-
-#include <linux/libcfs/libcfs.h>
-
-enum cfs_trace_buf_type {
- CFS_TCD_TYPE_PROC = 0,
- CFS_TCD_TYPE_SOFTIRQ,
- CFS_TCD_TYPE_IRQ,
- CFS_TCD_TYPE_MAX
-};
-
-/* trace file lock routines */
-
-#define TRACEFILE_NAME_SIZE 1024
-extern char cfs_tracefile[TRACEFILE_NAME_SIZE];
-extern long long cfs_tracefile_size;
-
-void libcfs_run_debug_log_upcall(char *file);
-
-int cfs_tracefile_init_arch(void);
-void cfs_tracefile_fini_arch(void);
-
-void cfs_tracefile_read_lock(void);
-void cfs_tracefile_read_unlock(void);
-void cfs_tracefile_write_lock(void);
-void cfs_tracefile_write_unlock(void);
-
-int cfs_tracefile_dump_all_pages(char *filename);
-void cfs_trace_debug_print(void);
-void cfs_trace_flush_pages(void);
-int cfs_trace_start_thread(void);
-void cfs_trace_stop_thread(void);
-int cfs_tracefile_init(int max_pages);
-void cfs_tracefile_exit(void);
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob);
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
- const char *knl_str, char *append);
-int cfs_trace_allocate_string_buffer(char **str, int nob);
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_daemon_command(char *str);
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_set_debug_mb(int mb);
-int cfs_trace_get_debug_mb(void);
-
-void libcfs_debug_dumplog_internal(void *arg);
-void libcfs_register_panic_notifier(void);
-void libcfs_unregister_panic_notifier(void);
-extern int libcfs_panic_in_progress;
-int cfs_trace_max_debug_mb(void);
-
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-#ifdef LUSTRE_TRACEFILE_PRIVATE
-
-/*
- * Private declare for tracefile
- */
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-/*
- * Size of a buffer for sprinting console messages if we can't get a page
- * from system
- */
-#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024
-
-union cfs_trace_data_union {
- struct cfs_trace_cpu_data {
- /*
- * Even though this structure is meant to be per-CPU, locking
- * is needed because in some places the data may be accessed
- * from other CPUs. This lock is directly used in trace_get_tcd
- * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and
- * tcd_for_each_type_lock
- */
- spinlock_t tcd_lock;
- unsigned long tcd_lock_flags;
-
- /*
- * pages with trace records not yet processed by tracefiled.
- */
- struct list_head tcd_pages;
- /* number of pages on ->tcd_pages */
- unsigned long tcd_cur_pages;
-
- /*
- * pages with trace records already processed by
- * tracefiled. These pages are kept in memory, so that some
- * portion of log can be written in the event of LBUG. This
- * list is maintained in LRU order.
- *
- * Pages are moved to ->tcd_daemon_pages by tracefiled()
- * (put_pages_on_daemon_list()). LRU pages from this list are
- * discarded when list grows too large.
- */
- struct list_head tcd_daemon_pages;
- /* number of pages on ->tcd_daemon_pages */
- unsigned long tcd_cur_daemon_pages;
-
- /*
- * Maximal number of pages allowed on ->tcd_pages and
- * ->tcd_daemon_pages each.
- * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
- * implementation.
- */
- unsigned long tcd_max_pages;
-
- /*
- * preallocated pages to write trace records into. Pages from
- * ->tcd_stock_pages are moved to ->tcd_pages by
- * portals_debug_msg().
- *
- * This list is necessary, because on some platforms it's
- * impossible to perform efficient atomic page allocation in a
- * non-blockable context.
- *
- * Such platforms fill ->tcd_stock_pages "on occasion", when
- * tracing code is entered in blockable context.
- *
- * trace_get_tage_try() tries to get a page from
- * ->tcd_stock_pages first and resorts to atomic page
- * allocation only if this queue is empty. ->tcd_stock_pages
- * is replenished when tracing code is entered in blocking
- * context (darwin-tracefile.c:trace_get_tcd()). We try to
- * maintain TCD_STOCK_PAGES (40 by default) pages in this
- * queue. Atomic allocation is only required if more than
- * TCD_STOCK_PAGES pagesful are consumed by trace records all
- * emitted in non-blocking contexts. Which is quite unlikely.
- */
- struct list_head tcd_stock_pages;
- /* number of pages on ->tcd_stock_pages */
- unsigned long tcd_cur_stock_pages;
-
- unsigned short tcd_shutting_down;
- unsigned short tcd_cpu;
- unsigned short tcd_type;
- /* The factors to share debug memory. */
- unsigned short tcd_pages_factor;
- } tcd;
- char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
-};
-
-#define TCD_MAX_TYPES 8
-extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS];
-
-#define cfs_tcd_for_each(tcd, i, j) \
- for (i = 0; cfs_trace_data[i]; i++) \
- for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \
- j < num_possible_cpus(); \
- j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
-
-#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \
- for (i = 0; cfs_trace_data[i] && \
- (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \
- cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
-
-void cfs_set_ptldebug_header(struct ptldebug_header *header,
- struct libcfs_debug_msg_data *m,
- unsigned long stack);
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
- const char *buf, int len, const char *file,
- const char *fn);
-
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-
-extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void);
-
-static inline char *
-cfs_trace_get_console_buffer(void)
-{
- unsigned int i = get_cpu();
- unsigned int j = cfs_trace_buf_idx_get();
-
- return cfs_trace_console_buffers[i][j];
-}
-
-static inline struct cfs_trace_cpu_data *
-cfs_trace_get_tcd(void)
-{
- struct cfs_trace_cpu_data *tcd =
- &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
-
- cfs_trace_lock_tcd(tcd, 0);
-
- return tcd;
-}
-
-static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
-{
- cfs_trace_unlock_tcd(tcd, 0);
-
- put_cpu();
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
- struct list_head *stock);
-
-void cfs_trace_assertion_failed(const char *str,
- struct libcfs_debug_msg_data *m);
-
-/* ASSERTION that is safe to use within the debug system */
-#define __LASSERT(cond) \
-do { \
- if (unlikely(!(cond))) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \
- cfs_trace_assertion_failed("ASSERTION("#cond") failed", \
- &msgdata); \
- } \
-} while (0)
-
-#define __LASSERT_TAGE_INVARIANT(tage) \
-do { \
- __LASSERT(tage); \
- __LASSERT(tage->page); \
- __LASSERT(tage->used <= PAGE_SIZE); \
- __LASSERT(page_count(tage->page) > 0); \
-} while (0)
-
-#endif /* LUSTRE_TRACEFILE_PRIVATE */
-
-#endif /* __LIBCFS_TRACEFILE_H__ */
diff --git a/drivers/staging/lustre/lnet/lnet/Makefile b/drivers/staging/lustre/lnet/lnet/Makefile
deleted file mode 100644
index 0a9d70924fe0..000000000000
--- a/drivers/staging/lustre/lnet/lnet/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += lnet.o
-
-lnet-y := api-ni.o config.o nidstrings.o net_fault.o \
- lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o \
- lib-socket.o lib-move.o module.o lo.o \
- router.o router_proc.o acceptor.o peer.o
diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c
deleted file mode 100644
index 5648f17eddc0..000000000000
--- a/drivers/staging/lustre/lnet/lnet/acceptor.c
+++ /dev/null
@@ -1,501 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/completion.h>
-#include <net/sock.h>
-#include <linux/lnet/lib-lnet.h>
-
-static int accept_port = 988;
-static int accept_backlog = 127;
-static int accept_timeout = 5;
-
-static struct {
- int pta_shutdown;
- struct socket *pta_sock;
- struct completion pta_signal;
-} lnet_acceptor_state = {
- .pta_shutdown = 1
-};
-
-int
-lnet_acceptor_port(void)
-{
- return accept_port;
-}
-EXPORT_SYMBOL(lnet_acceptor_port);
-
-static inline int
-lnet_accept_magic(__u32 magic, __u32 constant)
-{
- return (magic == constant ||
- magic == __swab32(constant));
-}
-
-static char *accept = "secure";
-
-module_param(accept, charp, 0444);
-MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)");
-module_param(accept_port, int, 0444);
-MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)");
-module_param(accept_backlog, int, 0444);
-MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog");
-module_param(accept_timeout, int, 0644);
-MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)");
-
-static char *accept_type;
-
-static int
-lnet_acceptor_get_tunables(void)
-{
- /*
- * Userland acceptor uses 'accept_type' instead of 'accept', due to
- * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
- * for compatibility. Hence the trick.
- */
- accept_type = accept;
- return 0;
-}
-
-int
-lnet_acceptor_timeout(void)
-{
- return accept_timeout;
-}
-EXPORT_SYMBOL(lnet_acceptor_timeout);
-
-void
-lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int peer_port)
-{
- switch (rc) {
- /* "normal" errors */
- case -ECONNREFUSED:
- CNETERR("Connection to %s at host %pI4h on port %d was refused: check that Lustre is running on that node.\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -EHOSTUNREACH:
- case -ENETUNREACH:
- CNETERR("Connection to %s at host %pI4h was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n",
- libcfs_nid2str(peer_nid), &peer_ip);
- break;
- case -ETIMEDOUT:
- CNETERR("Connection to %s at host %pI4h on port %d took too long: that node may be hung or experiencing high load.\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -ECONNRESET:
- LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h on port %d was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port,
- libcfs_nid2str(peer_nid));
- break;
- case -EPROTO:
- LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at host %pI4h on port %d: is it running a compatible version of Lustre?\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -EADDRINUSE:
- LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to connect to %s at host %pI4h on port %d\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- default:
- LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s at host %pI4h on port %d\n",
- rc, libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- }
-}
-EXPORT_SYMBOL(lnet_connect_console_error);
-
-int
-lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port)
-{
- struct lnet_acceptor_connreq cr;
- struct socket *sock;
- int rc;
- int port;
- int fatal;
-
- BUILD_BUG_ON(sizeof(cr) > 16); /* too big to be on the stack */
-
- for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
- port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
- --port) {
- /* Iterate through reserved ports. */
-
- rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip,
- peer_port);
- if (rc) {
- if (fatal)
- goto failed;
- continue;
- }
-
- BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
-
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- cr.acr_nid = peer_nid;
-
- if (the_lnet.ln_testprotocompat) {
- /* single-shot proto check */
- lnet_net_lock(LNET_LOCK_EX);
- if (the_lnet.ln_testprotocompat & 4) {
- cr.acr_version++;
- the_lnet.ln_testprotocompat &= ~4;
- }
- if (the_lnet.ln_testprotocompat & 8) {
- cr.acr_magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~8;
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
- if (rc)
- goto failed_sock;
-
- *sockp = sock;
- return 0;
- }
-
- rc = -EADDRINUSE;
- goto failed;
-
- failed_sock:
- sock_release(sock);
- failed:
- lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
- return rc;
-}
-EXPORT_SYMBOL(lnet_connect);
-
-static int
-lnet_accept(struct socket *sock, __u32 magic)
-{
- struct lnet_acceptor_connreq cr;
- __u32 peer_ip;
- int peer_port;
- int rc;
- int flip;
- struct lnet_ni *ni;
- char *str;
-
- LASSERT(sizeof(cr) <= 16); /* not too big for the stack */
-
- rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT(!rc); /* we succeeded before */
-
- if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
- if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
- /*
- * future version compatibility!
- * When LNET unifies protocols over all LNDs, the first
- * thing sent will be a version query. I send back
- * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old"
- */
- memset(&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- rc = lnet_sock_write(sock, &cr, sizeof(cr),
- accept_timeout);
-
- if (rc)
- CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n",
- &peer_ip, rc);
- return -EPROTO;
- }
-
- if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC))
- str = "'old' socknal/tcpnal";
- else
- str = "unrecognised";
-
- LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h magic %08x: %s acceptor protocol\n",
- &peer_ip, magic, str);
- return -EPROTO;
- }
-
- flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
-
- rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request version from %pI4h\n",
- rc, &peer_ip);
- return -EIO;
- }
-
- if (flip)
- __swab32s(&cr.acr_version);
-
- if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
- /*
- * future version compatibility!
- * An acceptor-specific protocol rev will first send a version
- * query. I send back my current version to tell her I'm
- * "old".
- */
- int peer_version = cr.acr_version;
-
- memset(&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
-
- rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
- if (rc)
- CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n",
- peer_version, &peer_ip, rc);
- return -EPROTO;
- }
-
- rc = lnet_sock_read(sock, &cr.acr_nid,
- sizeof(cr) -
- offsetof(struct lnet_acceptor_connreq, acr_nid),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request from %pI4h\n",
- rc, &peer_ip);
- return -EIO;
- }
-
- if (flip)
- __swab64s(&cr.acr_nid);
-
- ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
- if (!ni || /* no matching net */
- ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
- if (ni)
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n",
- &peer_ip, libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- if (!ni->ni_lnd->lnd_accept) {
- /* This catches a request for the loopback LND */
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n",
- &peer_ip, libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- CDEBUG(D_NET, "Accept %s from %pI4h\n",
- libcfs_nid2str(cr.acr_nid), &peer_ip);
-
- rc = ni->ni_lnd->lnd_accept(ni, sock);
-
- lnet_ni_decref(ni);
- return rc;
-}
-
-static int
-lnet_acceptor(void *arg)
-{
- struct socket *newsock;
- int rc;
- __u32 magic;
- __u32 peer_ip;
- int peer_port;
- int secure = (int)((long)arg);
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port,
- accept_backlog);
- if (rc) {
- if (rc == -EADDRINUSE)
- LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n",
- accept_port);
- else
- LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n",
- accept_port, rc);
-
- lnet_acceptor_state.pta_sock = NULL;
- } else {
- LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
- }
-
- /* set init status and unblock parent */
- lnet_acceptor_state.pta_shutdown = rc;
- complete(&lnet_acceptor_state.pta_signal);
-
- if (rc)
- return rc;
-
- while (!lnet_acceptor_state.pta_shutdown) {
- rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
- if (rc) {
- if (rc != -EAGAIN) {
- CWARN("Accept error %d: pausing...\n", rc);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
- continue;
- }
-
- /* maybe the LNet acceptor thread has been waken */
- if (lnet_acceptor_state.pta_shutdown) {
- sock_release(newsock);
- break;
- }
-
- rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
- if (rc) {
- CERROR("Can't determine new connection's address\n");
- goto failed;
- }
-
- if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- CERROR("Refusing connection from %pI4h: insecure port %d\n",
- &peer_ip, peer_port);
- goto failed;
- }
-
- rc = lnet_sock_read(newsock, &magic, sizeof(magic),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request from %pI4h\n",
- rc, &peer_ip);
- goto failed;
- }
-
- rc = lnet_accept(newsock, magic);
- if (rc)
- goto failed;
-
- continue;
-
-failed:
- sock_release(newsock);
- }
-
- sock_release(lnet_acceptor_state.pta_sock);
- lnet_acceptor_state.pta_sock = NULL;
-
- CDEBUG(D_NET, "Acceptor stopping\n");
-
- /* unblock lnet_acceptor_stop() */
- complete(&lnet_acceptor_state.pta_signal);
- return 0;
-}
-
-static inline int
-accept2secure(const char *acc, long *sec)
-{
- if (!strcmp(acc, "secure")) {
- *sec = 1;
- return 1;
- } else if (!strcmp(acc, "all")) {
- *sec = 0;
- return 1;
- } else if (!strcmp(acc, "none")) {
- return 0;
- }
-
- LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
- acc);
- return -EINVAL;
-}
-
-int
-lnet_acceptor_start(void)
-{
- struct task_struct *task;
- int rc;
- long rc2;
- long secure;
-
- /* if acceptor is already running return immediately */
- if (!lnet_acceptor_state.pta_shutdown)
- return 0;
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- rc = lnet_acceptor_get_tunables();
- if (rc)
- return rc;
-
- init_completion(&lnet_acceptor_state.pta_signal);
- rc = accept2secure(accept_type, &secure);
- if (rc <= 0)
- return rc;
-
- if (!lnet_count_acceptor_nis()) /* not required */
- return 0;
-
- task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
- "acceptor_%03ld", secure);
- if (IS_ERR(task)) {
- rc2 = PTR_ERR(task);
- CERROR("Can't start acceptor thread: %ld\n", rc2);
-
- return -ESRCH;
- }
-
- /* wait for acceptor to startup */
- wait_for_completion(&lnet_acceptor_state.pta_signal);
-
- if (!lnet_acceptor_state.pta_shutdown) {
- /* started OK */
- LASSERT(lnet_acceptor_state.pta_sock);
- return 0;
- }
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- return -ENETDOWN;
-}
-
-void
-lnet_acceptor_stop(void)
-{
- struct sock *sk;
-
- if (lnet_acceptor_state.pta_shutdown) /* not running */
- return;
-
- lnet_acceptor_state.pta_shutdown = 1;
-
- sk = lnet_acceptor_state.pta_sock->sk;
-
- /* awake any sleepers using safe method */
- sk->sk_state_change(sk);
-
- /* block until acceptor signals exit */
- wait_for_completion(&lnet_acceptor_state.pta_signal);
-}
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
deleted file mode 100644
index 90266be0132d..000000000000
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ /dev/null
@@ -1,2307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/log2.h>
-#include <linux/ktime.h>
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-#define D_LNI D_CONSOLE
-
-struct lnet the_lnet; /* THE state of the network */
-EXPORT_SYMBOL(the_lnet);
-
-static char *ip2nets = "";
-module_param(ip2nets, charp, 0444);
-MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
-
-static char *networks = "";
-module_param(networks, charp, 0444);
-MODULE_PARM_DESC(networks, "local networks");
-
-static char *routes = "";
-module_param(routes, charp, 0444);
-MODULE_PARM_DESC(routes, "routes to non-local networks");
-
-static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
-module_param(rnet_htable_size, int, 0444);
-MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
- struct lnet_process_id __user *ids, int n_ids);
-
-static char *
-lnet_get_routes(void)
-{
- return routes;
-}
-
-static char *
-lnet_get_networks(void)
-{
- char *nets;
- int rc;
-
- if (*networks && *ip2nets) {
- LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n");
- return NULL;
- }
-
- if (*ip2nets) {
- rc = lnet_parse_ip2nets(&nets, ip2nets);
- return !rc ? nets : NULL;
- }
-
- if (*networks)
- return networks;
-
- return "tcp";
-}
-
-static void
-lnet_init_locks(void)
-{
- spin_lock_init(&the_lnet.ln_eq_wait_lock);
- init_waitqueue_head(&the_lnet.ln_eq_waitq);
- init_waitqueue_head(&the_lnet.ln_rc_waitq);
- mutex_init(&the_lnet.ln_lnd_mutex);
- mutex_init(&the_lnet.ln_api_mutex);
-}
-
-static int
-lnet_create_remote_nets_table(void)
-{
- int i;
- struct list_head *hash;
-
- LASSERT(!the_lnet.ln_remote_nets_hash);
- LASSERT(the_lnet.ln_remote_nets_hbits > 0);
- hash = kvmalloc_array(LNET_REMOTE_NETS_HASH_SIZE, sizeof(*hash),
- GFP_KERNEL);
- if (!hash) {
- CERROR("Failed to create remote nets hash table\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
- INIT_LIST_HEAD(&hash[i]);
- the_lnet.ln_remote_nets_hash = hash;
- return 0;
-}
-
-static void
-lnet_destroy_remote_nets_table(void)
-{
- int i;
-
- if (!the_lnet.ln_remote_nets_hash)
- return;
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
- LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
-
- kvfree(the_lnet.ln_remote_nets_hash);
- the_lnet.ln_remote_nets_hash = NULL;
-}
-
-static void
-lnet_destroy_locks(void)
-{
- if (the_lnet.ln_res_lock) {
- cfs_percpt_lock_free(the_lnet.ln_res_lock);
- the_lnet.ln_res_lock = NULL;
- }
-
- if (the_lnet.ln_net_lock) {
- cfs_percpt_lock_free(the_lnet.ln_net_lock);
- the_lnet.ln_net_lock = NULL;
- }
-}
-
-static int
-lnet_create_locks(void)
-{
- lnet_init_locks();
-
- the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
- if (!the_lnet.ln_res_lock)
- goto failed;
-
- the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
- if (!the_lnet.ln_net_lock)
- goto failed;
-
- return 0;
-
- failed:
- lnet_destroy_locks();
- return -ENOMEM;
-}
-
-static void lnet_assert_wire_constants(void)
-{
- /*
- * Wire protocol assertions generated by 'wirecheck'
- * running on Linux robert.bartonsoftware.com 2.6.8-1.521
- * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
- * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
- */
-
- /* Constants... */
- BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
- BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
- BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
- BUILD_BUG_ON(LNET_MSG_ACK != 0);
- BUILD_BUG_ON(LNET_MSG_PUT != 1);
- BUILD_BUG_ON(LNET_MSG_GET != 2);
- BUILD_BUG_ON(LNET_MSG_REPLY != 3);
- BUILD_BUG_ON(LNET_MSG_HELLO != 4);
-
- /* Checks for struct ptl_handle_wire_t */
- BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_object_cookie) != 8);
- BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
-
- /* Checks for struct struct lnet_magicversion */
- BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_minor) != 6);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
-
- /* Checks for struct struct lnet_hdr */
- BUILD_BUG_ON((int)sizeof(struct lnet_hdr) != 72);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_nid) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_nid) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_nid) != 8);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_nid) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_pid) != 16);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_pid) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_pid) != 20);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_pid) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, type) != 24);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->type) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, payload_length) != 28);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->payload_length) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg) != 40);
-
- /* Ack */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.mlength) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) != 4);
-
- /* Put */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.hdr_data) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ptl_index) != 64);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.offset) != 68);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) != 4);
-
- /* Get */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.return_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.ptl_index) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.src_offset) != 60);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.sink_length) != 64);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) != 4);
-
- /* Reply */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) != 16);
-
- /* Hello */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.incarnation) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.type) != 40);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) != 4);
-}
-
-static struct lnet_lnd *
-lnet_find_lnd_by_type(__u32 type)
-{
- struct lnet_lnd *lnd;
- struct list_head *tmp;
-
- /* holding lnd mutex */
- list_for_each(tmp, &the_lnet.ln_lnds) {
- lnd = list_entry(tmp, struct lnet_lnd, lnd_list);
-
- if (lnd->lnd_type == type)
- return lnd;
- }
-
- return NULL;
-}
-
-void
-lnet_register_lnd(struct lnet_lnd *lnd)
-{
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
- LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type));
-
- list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
- lnd->lnd_refcount = 0;
-
- CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_register_lnd);
-
-void
-lnet_unregister_lnd(struct lnet_lnd *lnd)
-{
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
- LASSERT(!lnd->lnd_refcount);
-
- list_del(&lnd->lnd_list);
- CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_unregister_lnd);
-
-void
-lnet_counters_get(struct lnet_counters *counters)
-{
- struct lnet_counters *ctr;
- int i;
-
- memset(counters, 0, sizeof(*counters));
-
- lnet_net_lock(LNET_LOCK_EX);
-
- cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
- counters->msgs_max += ctr->msgs_max;
- counters->msgs_alloc += ctr->msgs_alloc;
- counters->errors += ctr->errors;
- counters->send_count += ctr->send_count;
- counters->recv_count += ctr->recv_count;
- counters->route_count += ctr->route_count;
- counters->drop_count += ctr->drop_count;
- counters->send_length += ctr->send_length;
- counters->recv_length += ctr->recv_length;
- counters->route_length += ctr->route_length;
- counters->drop_length += ctr->drop_length;
- }
- lnet_net_unlock(LNET_LOCK_EX);
-}
-EXPORT_SYMBOL(lnet_counters_get);
-
-void
-lnet_counters_reset(void)
-{
- struct lnet_counters *counters;
- int i;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
- memset(counters, 0, sizeof(struct lnet_counters));
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static char *
-lnet_res_type2str(int type)
-{
- switch (type) {
- default:
- LBUG();
- case LNET_COOKIE_TYPE_MD:
- return "MD";
- case LNET_COOKIE_TYPE_ME:
- return "ME";
- case LNET_COOKIE_TYPE_EQ:
- return "EQ";
- }
-}
-
-static void
-lnet_res_container_cleanup(struct lnet_res_container *rec)
-{
- int count = 0;
-
- if (!rec->rec_type) /* not set yet, it's uninitialized */
- return;
-
- while (!list_empty(&rec->rec_active)) {
- struct list_head *e = rec->rec_active.next;
-
- list_del_init(e);
- if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
- kfree(list_entry(e, struct lnet_eq, eq_list));
-
- } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
- kfree(list_entry(e, struct lnet_libmd, md_list));
-
- } else { /* NB: Active MEs should be attached on portals */
- LBUG();
- }
- count++;
- }
-
- if (count > 0) {
- /*
- * Found alive MD/ME/EQ, user really should unlink/free
- * all of them before finalize LNet, but if someone didn't,
- * we have to recycle garbage for him
- */
- CERROR("%d active elements on exit of %s container\n",
- count, lnet_res_type2str(rec->rec_type));
- }
-
- kfree(rec->rec_lh_hash);
- rec->rec_lh_hash = NULL;
-
- rec->rec_type = 0; /* mark it as finalized */
-}
-
-static int
-lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
-{
- int rc = 0;
- int i;
-
- LASSERT(!rec->rec_type);
-
- rec->rec_type = type;
- INIT_LIST_HEAD(&rec->rec_active);
- rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
-
- /* Arbitrary choice of hash table size */
- rec->rec_lh_hash = kvmalloc_cpt(LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]),
- GFP_KERNEL, cpt);
- if (!rec->rec_lh_hash) {
- rc = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < LNET_LH_HASH_SIZE; i++)
- INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
-
- return 0;
-
-out:
- CERROR("Failed to setup %s resource container\n",
- lnet_res_type2str(type));
- lnet_res_container_cleanup(rec);
- return rc;
-}
-
-static void
-lnet_res_containers_destroy(struct lnet_res_container **recs)
-{
- struct lnet_res_container *rec;
- int i;
-
- cfs_percpt_for_each(rec, i, recs)
- lnet_res_container_cleanup(rec);
-
- cfs_percpt_free(recs);
-}
-
-static struct lnet_res_container **
-lnet_res_containers_create(int type)
-{
- struct lnet_res_container **recs;
- struct lnet_res_container *rec;
- int rc;
- int i;
-
- recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
- if (!recs) {
- CERROR("Failed to allocate %s resource containers\n",
- lnet_res_type2str(type));
- return NULL;
- }
-
- cfs_percpt_for_each(rec, i, recs) {
- rc = lnet_res_container_setup(rec, i, type);
- if (rc) {
- lnet_res_containers_destroy(recs);
- return NULL;
- }
- }
-
- return recs;
-}
-
-struct lnet_libhandle *
-lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
-{
- /* ALWAYS called with lnet_res_lock held */
- struct list_head *head;
- struct lnet_libhandle *lh;
- unsigned int hash;
-
- if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
- return NULL;
-
- hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
- head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
-
- list_for_each_entry(lh, head, lh_hash_chain) {
- if (lh->lh_cookie == cookie)
- return lh;
- }
-
- return NULL;
-}
-
-void
-lnet_res_lh_initialize(struct lnet_res_container *rec,
- struct lnet_libhandle *lh)
-{
- /* ALWAYS called with lnet_res_lock held */
- unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
- unsigned int hash;
-
- lh->lh_cookie = rec->rec_lh_cookie;
- rec->rec_lh_cookie += 1 << ibits;
-
- hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
-
- list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
-}
-
-static int lnet_unprepare(void);
-
-static int
-lnet_prepare(lnet_pid_t requested_pid)
-{
- /* Prepare to bring up the network */
- struct lnet_res_container **recs;
- int rc = 0;
-
- if (requested_pid == LNET_PID_ANY) {
- /* Don't instantiate LNET just for me */
- return -ENETDOWN;
- }
-
- LASSERT(!the_lnet.ln_refcount);
-
- the_lnet.ln_routing = 0;
-
- LASSERT(!(requested_pid & LNET_PID_USERFLAG));
- the_lnet.ln_pid = requested_pid;
-
- INIT_LIST_HEAD(&the_lnet.ln_test_peers);
- INIT_LIST_HEAD(&the_lnet.ln_nis);
- INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
- INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
- INIT_LIST_HEAD(&the_lnet.ln_routers);
- INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
- INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
-
- rc = lnet_create_remote_nets_table();
- if (rc)
- goto failed;
- /*
- * NB the interface cookie in wire handles guards against delayed
- * replies and ACKs appearing valid after reboot.
- */
- the_lnet.ln_interface_cookie = ktime_get_ns();
-
- the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct lnet_counters));
- if (!the_lnet.ln_counters) {
- CERROR("Failed to allocate counters for LNet\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- rc = lnet_peer_tables_create();
- if (rc)
- goto failed;
-
- rc = lnet_msg_containers_create();
- if (rc)
- goto failed;
-
- rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
- LNET_COOKIE_TYPE_EQ);
- if (rc)
- goto failed;
-
- recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
- if (!recs) {
- rc = -ENOMEM;
- goto failed;
- }
-
- the_lnet.ln_me_containers = recs;
-
- recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
- if (!recs) {
- rc = -ENOMEM;
- goto failed;
- }
-
- the_lnet.ln_md_containers = recs;
-
- rc = lnet_portals_create();
- if (rc) {
- CERROR("Failed to create portals for LNet: %d\n", rc);
- goto failed;
- }
-
- return 0;
-
- failed:
- lnet_unprepare();
- return rc;
-}
-
-static int
-lnet_unprepare(void)
-{
- /*
- * NB no LNET_LOCK since this is the last reference. All LND instances
- * have shut down already, so it is safe to unlink and free all
- * descriptors, even those that appear committed to a network op (eg MD
- * with non-zero pending count)
- */
- lnet_fail_nid(LNET_NID_ANY, 0);
-
- LASSERT(!the_lnet.ln_refcount);
- LASSERT(list_empty(&the_lnet.ln_test_peers));
- LASSERT(list_empty(&the_lnet.ln_nis));
- LASSERT(list_empty(&the_lnet.ln_nis_cpt));
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_portals_destroy();
-
- if (the_lnet.ln_md_containers) {
- lnet_res_containers_destroy(the_lnet.ln_md_containers);
- the_lnet.ln_md_containers = NULL;
- }
-
- if (the_lnet.ln_me_containers) {
- lnet_res_containers_destroy(the_lnet.ln_me_containers);
- the_lnet.ln_me_containers = NULL;
- }
-
- lnet_res_container_cleanup(&the_lnet.ln_eq_container);
-
- lnet_msg_containers_destroy();
- lnet_peer_tables_destroy();
- lnet_rtrpools_free(0);
-
- if (the_lnet.ln_counters) {
- cfs_percpt_free(the_lnet.ln_counters);
- the_lnet.ln_counters = NULL;
- }
- lnet_destroy_remote_nets_table();
-
- return 0;
-}
-
-struct lnet_ni *
-lnet_net2ni_locked(__u32 net, int cpt)
-{
- struct list_head *tmp;
- struct lnet_ni *ni;
-
- LASSERT(cpt != LNET_LOCK_EX);
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
- }
- }
-
- return NULL;
-}
-
-struct lnet_ni *
-lnet_net2ni(__u32 net)
-{
- struct lnet_ni *ni;
-
- lnet_net_lock(0);
- ni = lnet_net2ni_locked(net, 0);
- lnet_net_unlock(0);
-
- return ni;
-}
-EXPORT_SYMBOL(lnet_net2ni);
-
-static unsigned int
-lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
-{
- __u64 key = nid;
- unsigned int val;
-
- LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
-
- if (number == 1)
- return 0;
-
- val = hash_long(key, LNET_CPT_BITS);
- /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
- if (val < number)
- return val;
-
- return (unsigned int)(key + val + (val >> 1)) % number;
-}
-
-int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
-{
- struct lnet_ni *ni;
-
- /* must called with hold of lnet_net_lock */
- if (LNET_CPT_NUMBER == 1)
- return 0; /* the only one */
-
- /* take lnet_net_lock(any) would be OK */
- if (!list_empty(&the_lnet.ln_nis_cpt)) {
- list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
- if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
- continue;
-
- LASSERT(ni->ni_cpts);
- return ni->ni_cpts[lnet_nid_cpt_hash
- (nid, ni->ni_ncpts)];
- }
- }
-
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-}
-
-int
-lnet_cpt_of_nid(lnet_nid_t nid)
-{
- int cpt;
- int cpt2;
-
- if (LNET_CPT_NUMBER == 1)
- return 0; /* the only one */
-
- if (list_empty(&the_lnet.ln_nis_cpt))
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
- cpt = lnet_net_lock_current();
- cpt2 = lnet_cpt_of_nid_locked(nid);
- lnet_net_unlock(cpt);
-
- return cpt2;
-}
-EXPORT_SYMBOL(lnet_cpt_of_nid);
-
-int
-lnet_islocalnet(__u32 net)
-{
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- ni = lnet_net2ni_locked(net, cpt);
- if (ni)
- lnet_ni_decref_locked(ni, cpt);
-
- lnet_net_unlock(cpt);
-
- return !!ni;
-}
-
-struct lnet_ni *
-lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
-
- LASSERT(cpt != LNET_LOCK_EX);
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (ni->ni_nid == nid) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_islocalnid(lnet_nid_t nid)
-{
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
- ni = lnet_nid2ni_locked(nid, cpt);
- if (ni)
- lnet_ni_decref_locked(ni, cpt);
- lnet_net_unlock(cpt);
-
- return !!ni;
-}
-
-int
-lnet_count_acceptor_nis(void)
-{
- /* Return the # of NIs that need the acceptor. */
- int count = 0;
- struct list_head *tmp;
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (ni->ni_lnd->lnd_accept)
- count++;
- }
-
- lnet_net_unlock(cpt);
-
- return count;
-}
-
-static struct lnet_ping_info *
-lnet_ping_info_create(int num_ni)
-{
- struct lnet_ping_info *ping_info;
- unsigned int infosz;
-
- infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
- ping_info = kvzalloc(infosz, GFP_KERNEL);
- if (!ping_info) {
- CERROR("Can't allocate ping info[%d]\n", num_ni);
- return NULL;
- }
-
- ping_info->pi_nnis = num_ni;
- ping_info->pi_pid = the_lnet.ln_pid;
- ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
- ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
-
- return ping_info;
-}
-
-static inline int
-lnet_get_ni_count(void)
-{
- struct lnet_ni *ni;
- int count = 0;
-
- lnet_net_lock(0);
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
- count++;
-
- lnet_net_unlock(0);
-
- return count;
-}
-
-static inline void
-lnet_ping_info_free(struct lnet_ping_info *pinfo)
-{
- kvfree(pinfo);
-}
-
-static void
-lnet_ping_info_destroy(void)
-{
- struct lnet_ni *ni;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- lnet_ni_lock(ni);
- ni->ni_status = NULL;
- lnet_ni_unlock(ni);
- }
-
- lnet_ping_info_free(the_lnet.ln_ping_info);
- the_lnet.ln_ping_info = NULL;
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static void
-lnet_ping_event_handler(struct lnet_event *event)
-{
- struct lnet_ping_info *pinfo = event->md.user_ptr;
-
- if (event->unlinked)
- pinfo->pi_features = LNET_PING_FEAT_INVAL;
-}
-
-static int
-lnet_ping_info_setup(struct lnet_ping_info **ppinfo,
- struct lnet_handle_md *md_handle,
- int ni_count, bool set_eq)
-{
- struct lnet_process_id id = {LNET_NID_ANY, LNET_PID_ANY};
- struct lnet_handle_me me_handle;
- struct lnet_md md = { NULL };
- int rc, rc2;
-
- if (set_eq) {
- rc = LNetEQAlloc(0, lnet_ping_event_handler,
- &the_lnet.ln_ping_target_eq);
- if (rc) {
- CERROR("Can't allocate ping EQ: %d\n", rc);
- return rc;
- }
- }
-
- *ppinfo = lnet_ping_info_create(ni_count);
- if (!*ppinfo) {
- rc = -ENOMEM;
- goto failed_0;
- }
-
- rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
- LNET_PROTO_PING_MATCHBITS, 0,
- LNET_UNLINK, LNET_INS_AFTER,
- &me_handle);
- if (rc) {
- CERROR("Can't create ping ME: %d\n", rc);
- goto failed_1;
- }
-
- /* initialize md content */
- md.start = *ppinfo;
- md.length = offsetof(struct lnet_ping_info,
- pi_ni[(*ppinfo)->pi_nnis]);
- md.threshold = LNET_MD_THRESH_INF;
- md.max_size = 0;
- md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
- LNET_MD_MANAGE_REMOTE;
- md.user_ptr = NULL;
- md.eq_handle = the_lnet.ln_ping_target_eq;
- md.user_ptr = *ppinfo;
-
- rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
- if (rc) {
- CERROR("Can't attach ping MD: %d\n", rc);
- goto failed_2;
- }
-
- return 0;
-
-failed_2:
- rc2 = LNetMEUnlink(me_handle);
- LASSERT(!rc2);
-failed_1:
- lnet_ping_info_free(*ppinfo);
- *ppinfo = NULL;
-failed_0:
- if (set_eq)
- LNetEQFree(the_lnet.ln_ping_target_eq);
- return rc;
-}
-
-static void
-lnet_ping_md_unlink(struct lnet_ping_info *pinfo,
- struct lnet_handle_md *md_handle)
-{
- LNetMDUnlink(*md_handle);
- LNetInvalidateMDHandle(md_handle);
-
- /* NB md could be busy; this just starts the unlink */
- while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
- CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
- set_current_state(TASK_NOLOAD);
- schedule_timeout(HZ);
- }
-}
-
-static void
-lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
-{
- struct lnet_ni_status *ns;
- struct lnet_ni *ni;
- int i = 0;
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- LASSERT(i < ping_info->pi_nnis);
-
- ns = &ping_info->pi_ni[i];
-
- ns->ns_nid = ni->ni_nid;
-
- lnet_ni_lock(ni);
- ns->ns_status = (ni->ni_status) ?
- ni->ni_status->ns_status : LNET_NI_STATUS_UP;
- ni->ni_status = ns;
- lnet_ni_unlock(ni);
-
- i++;
- }
-}
-
-static void
-lnet_ping_target_update(struct lnet_ping_info *pinfo,
- struct lnet_handle_md md_handle)
-{
- struct lnet_ping_info *old_pinfo = NULL;
- struct lnet_handle_md old_md;
-
- /* switch the NIs to point to the new ping info created */
- lnet_net_lock(LNET_LOCK_EX);
-
- if (!the_lnet.ln_routing)
- pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
- lnet_ping_info_install_locked(pinfo);
-
- if (the_lnet.ln_ping_info) {
- old_pinfo = the_lnet.ln_ping_info;
- old_md = the_lnet.ln_ping_target_md;
- }
- the_lnet.ln_ping_target_md = md_handle;
- the_lnet.ln_ping_info = pinfo;
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- if (old_pinfo) {
- /* unlink the old ping info */
- lnet_ping_md_unlink(old_pinfo, &old_md);
- lnet_ping_info_free(old_pinfo);
- }
-}
-
-static void
-lnet_ping_target_fini(void)
-{
- int rc;
-
- lnet_ping_md_unlink(the_lnet.ln_ping_info,
- &the_lnet.ln_ping_target_md);
-
- rc = LNetEQFree(the_lnet.ln_ping_target_eq);
- LASSERT(!rc);
-
- lnet_ping_info_destroy();
-}
-
-static int
-lnet_ni_tq_credits(struct lnet_ni *ni)
-{
- int credits;
-
- LASSERT(ni->ni_ncpts >= 1);
-
- if (ni->ni_ncpts == 1)
- return ni->ni_maxtxcredits;
-
- credits = ni->ni_maxtxcredits / ni->ni_ncpts;
- credits = max(credits, 8 * ni->ni_peertxcredits);
- credits = min(credits, ni->ni_maxtxcredits);
-
- return credits;
-}
-
-static void
-lnet_ni_unlink_locked(struct lnet_ni *ni)
-{
- if (!list_empty(&ni->ni_cptlist)) {
- list_del_init(&ni->ni_cptlist);
- lnet_ni_decref_locked(ni, 0);
- }
-
- /* move it to zombie list and nobody can find it anymore */
- LASSERT(!list_empty(&ni->ni_list));
- list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
- lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */
-}
-
-static void
-lnet_clear_zombies_nis_locked(void)
-{
- int i;
- int islo;
- struct lnet_ni *ni;
- struct lnet_ni *temp;
-
- /*
- * Now wait for the NI's I just nuked to show up on ln_zombie_nis
- * and shut them down in guaranteed thread context
- */
- i = 2;
- list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) {
- int *ref;
- int j;
-
- list_del_init(&ni->ni_list);
- cfs_percpt_for_each(ref, j, ni->ni_refs) {
- if (!*ref)
- continue;
- /* still busy, add it back to zombie list */
- list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
- break;
- }
-
- if (!list_empty(&ni->ni_list)) {
- lnet_net_unlock(LNET_LOCK_EX);
- ++i;
- if ((i & (-i)) == i) {
- CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
- libcfs_nid2str(ni->ni_nid));
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- lnet_net_lock(LNET_LOCK_EX);
- continue;
- }
-
- ni->ni_lnd->lnd_refcount--;
- lnet_net_unlock(LNET_LOCK_EX);
-
- islo = ni->ni_lnd->lnd_type == LOLND;
-
- LASSERT(!in_interrupt());
- ni->ni_lnd->lnd_shutdown(ni);
-
- /*
- * can't deref lnd anymore now; it might have unregistered
- * itself...
- */
- if (!islo)
- CDEBUG(D_LNI, "Removed LNI %s\n",
- libcfs_nid2str(ni->ni_nid));
-
- lnet_ni_free(ni);
- i = 2;
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-}
-
-static void
-lnet_shutdown_lndnis(void)
-{
- struct lnet_ni *ni;
- struct lnet_ni *temp;
- int i;
-
- /* NB called holding the global mutex */
-
- /* All quiet on the API front */
- LASSERT(!the_lnet.ln_shutdown);
- LASSERT(!the_lnet.ln_refcount);
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_shutdown = 1; /* flag shutdown */
-
- /* Unlink NIs from the global table */
- list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) {
- lnet_ni_unlink_locked(ni);
- }
-
- /* Drop the cached loopback NI. */
- if (the_lnet.ln_loni) {
- lnet_ni_decref_locked(the_lnet.ln_loni, 0);
- the_lnet.ln_loni = NULL;
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- /*
- * Clear lazy portals and drop delayed messages which hold refs
- * on their lnet_msg::msg_rxpeer
- */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- LNetClearLazyPortal(i);
-
- /*
- * Clear the peer table and wait for all peers to go (they hold refs on
- * their NIs)
- */
- lnet_peer_tables_cleanup(NULL);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- lnet_clear_zombies_nis_locked();
- the_lnet.ln_shutdown = 0;
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/* shutdown down the NI and release refcount */
-static void
-lnet_shutdown_lndni(struct lnet_ni *ni)
-{
- int i;
-
- lnet_net_lock(LNET_LOCK_EX);
- lnet_ni_unlink_locked(ni);
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* clear messages for this NI on the lazy portal */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- lnet_clear_lazy_portal(ni, i, "Shutting down NI");
-
- /* Do peer table cleanup for this ni */
- lnet_peer_tables_cleanup(ni);
-
- lnet_net_lock(LNET_LOCK_EX);
- lnet_clear_zombies_nis_locked();
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
-{
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
- int rc = -EINVAL;
- int lnd_type;
- struct lnet_lnd *lnd;
- struct lnet_tx_queue *tq;
- int i;
- u32 seed;
-
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
-
- LASSERT(libcfs_isknown_lnd(lnd_type));
-
- if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
- lnd_type == IIBLND || lnd_type == VIBLND) {
- CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
- goto failed0;
- }
-
- /* Make sure this new NI is unique. */
- lnet_net_lock(LNET_LOCK_EX);
- rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
- lnet_net_unlock(LNET_LOCK_EX);
- if (!rc) {
- if (lnd_type == LOLND) {
- lnet_ni_free(ni);
- return 0;
- }
-
- CERROR("Net %s is not unique\n",
- libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
- rc = -EEXIST;
- goto failed0;
- }
-
- mutex_lock(&the_lnet.ln_lnd_mutex);
- lnd = lnet_find_lnd_by_type(lnd_type);
-
- if (!lnd) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = request_module("%s", libcfs_lnd2modname(lnd_type));
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- lnd = lnet_find_lnd_by_type(lnd_type);
- if (!lnd) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- CERROR("Can't load LND %s, module %s, rc=%d\n",
- libcfs_lnd2str(lnd_type),
- libcfs_lnd2modname(lnd_type), rc);
- rc = -EINVAL;
- goto failed0;
- }
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount++;
- lnet_net_unlock(LNET_LOCK_EX);
-
- ni->ni_lnd = lnd;
-
- if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
- lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
-
- if (lnd_tunables) {
- ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
- GFP_NOFS);
- if (!ni->ni_lnd_tunables) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = -ENOMEM;
- goto failed0;
- }
- memcpy(ni->ni_lnd_tunables, lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- }
-
- /*
- * If given some LND tunable parameters, parse those now to
- * override the values in the NI structure.
- */
- if (conf) {
- if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
- ni->ni_peerrtrcredits =
- conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
- if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
- ni->ni_peertimeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
- if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
- ni->ni_peertxcredits =
- conf->cfg_config_u.cfg_net.net_peer_tx_credits;
- if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
- ni->ni_maxtxcredits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
- }
-
- rc = lnd->lnd_startup(ni);
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-
- if (rc) {
- LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
- rc, libcfs_lnd2str(lnd->lnd_type));
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount--;
- lnet_net_unlock(LNET_LOCK_EX);
- goto failed0;
- }
-
- LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
-
- lnet_net_lock(LNET_LOCK_EX);
- /* refcount for ln_nis */
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
- if (ni->ni_cpts) {
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- if (lnd->lnd_type == LOLND) {
- lnet_ni_addref(ni);
- LASSERT(!the_lnet.ln_loni);
- the_lnet.ln_loni = ni;
- return 0;
- }
-
- if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) {
- LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
- libcfs_lnd2str(lnd->lnd_type),
- !ni->ni_peertxcredits ?
- "" : "per-peer ");
- /*
- * shutdown the NI since if we get here then it must've already
- * been started
- */
- lnet_shutdown_lndni(ni);
- return -EINVAL;
- }
-
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
- tq->tq_credits_min =
- tq->tq_credits_max =
- tq->tq_credits = lnet_ni_tq_credits(ni);
- }
-
- /* Nodes with small feet have little entropy. The NID for this
- * node gives the most entropy in the low bits.
- */
- seed = LNET_NIDADDR(ni->ni_nid);
- add_device_randomness(&seed, sizeof(seed));
-
- CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
- libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
- lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
- ni->ni_peerrtrcredits, ni->ni_peertimeout);
-
- return 0;
-failed0:
- lnet_ni_free(ni);
- return rc;
-}
-
-static int
-lnet_startup_lndnis(struct list_head *nilist)
-{
- struct lnet_ni *ni;
- int rc;
- int ni_count = 0;
-
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, struct lnet_ni, ni_list);
- list_del(&ni->ni_list);
- rc = lnet_startup_lndni(ni, NULL);
-
- if (rc < 0)
- goto failed;
-
- ni_count++;
- }
-
- return ni_count;
-failed:
- lnet_shutdown_lndnis();
-
- return rc;
-}
-
-/**
- * Initialize LNet library.
- *
- * Automatically called at module loading time. Caller has to call
- * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
- * latter returned 0. It must be called exactly once.
- *
- * \retval 0 on success
- * \retval -ve on failures.
- */
-int lnet_lib_init(void)
-{
- int rc;
-
- lnet_assert_wire_constants();
-
- memset(&the_lnet, 0, sizeof(the_lnet));
-
- /* refer to global cfs_cpt_table for now */
- the_lnet.ln_cpt_table = cfs_cpt_table;
- the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_table);
-
- LASSERT(the_lnet.ln_cpt_number > 0);
- if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
- /* we are under risk of consuming all lh_cookie */
- CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n",
- the_lnet.ln_cpt_number, LNET_CPT_MAX);
- return -E2BIG;
- }
-
- while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
- the_lnet.ln_cpt_bits++;
-
- rc = lnet_create_locks();
- if (rc) {
- CERROR("Can't create LNet global locks: %d\n", rc);
- return rc;
- }
-
- the_lnet.ln_refcount = 0;
- LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
- INIT_LIST_HEAD(&the_lnet.ln_lnds);
- INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
- INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
-
- /*
- * The hash table size is the number of bits it takes to express the set
- * ln_num_routes, minus 1 (better to under estimate than over so we
- * don't waste memory).
- */
- if (rnet_htable_size <= 0)
- rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
- else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
- rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
- the_lnet.ln_remote_nets_hbits = max_t(int, 1,
- order_base_2(rnet_htable_size) - 1);
-
- /*
- * All LNDs apart from the LOLND are in separate modules. They
- * register themselves when their module loads, and unregister
- * themselves when their module is unloaded.
- */
- lnet_register_lnd(&the_lolnd);
- return 0;
-}
-
-/**
- * Finalize LNet library.
- *
- * \pre lnet_lib_init() called with success.
- * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
- */
-void lnet_lib_exit(void)
-{
- LASSERT(!the_lnet.ln_refcount);
-
- while (!list_empty(&the_lnet.ln_lnds))
- lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
- struct lnet_lnd, lnd_list));
- lnet_destroy_locks();
-}
-
-/**
- * Set LNet PID and start LNet interfaces, routing, and forwarding.
- *
- * Users must call this function at least once before any other functions.
- * For each successful call there must be a corresponding call to
- * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
- * ignored.
- *
- * The PID used by LNet may be different from the one requested.
- * See LNetGetId().
- *
- * \param requested_pid PID requested by the caller.
- *
- * \return >= 0 on success, and < 0 error code on failures.
- */
-int
-LNetNIInit(lnet_pid_t requested_pid)
-{
- int im_a_router = 0;
- int rc;
- int ni_count;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- struct list_head net_head;
-
- INIT_LIST_HEAD(&net_head);
-
- mutex_lock(&the_lnet.ln_api_mutex);
-
- CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
-
- if (the_lnet.ln_refcount > 0) {
- rc = the_lnet.ln_refcount++;
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
-
- rc = lnet_prepare(requested_pid);
- if (rc) {
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
-
- /* Add in the loopback network */
- if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) {
- rc = -ENOMEM;
- goto err_empty_list;
- }
-
- /*
- * If LNet is being initialized via DLC it is possible
- * that the user requests not to load module parameters (ones which
- * are supported by DLC) on initialization. Therefore, make sure not
- * to load networks, routes and forwarding from module parameters
- * in this case. On cleanup in case of failure only clean up
- * routes if it has been loaded
- */
- if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_networks(&net_head, lnet_get_networks());
- if (rc < 0)
- goto err_empty_list;
- }
-
- ni_count = lnet_startup_lndnis(&net_head);
- if (ni_count < 0) {
- rc = ni_count;
- goto err_empty_list;
- }
-
- if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
- if (rc)
- goto err_shutdown_lndnis;
-
- rc = lnet_check_routes();
- if (rc)
- goto err_destroy_routes;
-
- rc = lnet_rtrpools_alloc(im_a_router);
- if (rc)
- goto err_destroy_routes;
- }
-
- rc = lnet_acceptor_start();
- if (rc)
- goto err_destroy_routes;
-
- the_lnet.ln_refcount = 1;
- /* Now I may use my own API functions... */
-
- rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
- if (rc)
- goto err_acceptor_stop;
-
- lnet_ping_target_update(pinfo, md_handle);
-
- rc = lnet_router_checker_start();
- if (rc)
- goto err_stop_ping;
-
- lnet_fault_init();
- lnet_router_debugfs_init();
-
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return 0;
-
-err_stop_ping:
- lnet_ping_target_fini();
-err_acceptor_stop:
- the_lnet.ln_refcount = 0;
- lnet_acceptor_stop();
-err_destroy_routes:
- if (!the_lnet.ln_nis_from_mod_params)
- lnet_destroy_routes();
-err_shutdown_lndnis:
- lnet_shutdown_lndnis();
-err_empty_list:
- lnet_unprepare();
- LASSERT(rc < 0);
- mutex_unlock(&the_lnet.ln_api_mutex);
- while (!list_empty(&net_head)) {
- struct lnet_ni *ni;
-
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
- }
- return rc;
-}
-EXPORT_SYMBOL(LNetNIInit);
-
-/**
- * Stop LNet interfaces, routing, and forwarding.
- *
- * Users must call this function once for each successful call to LNetNIInit().
- * Once the LNetNIFini() operation has been started, the results of pending
- * API operations are undefined.
- *
- * \return always 0 for current implementation.
- */
-int
-LNetNIFini(void)
-{
- mutex_lock(&the_lnet.ln_api_mutex);
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (the_lnet.ln_refcount != 1) {
- the_lnet.ln_refcount--;
- } else {
- LASSERT(!the_lnet.ln_niinit_self);
-
- lnet_fault_fini();
- lnet_router_debugfs_fini();
- lnet_router_checker_stop();
- lnet_ping_target_fini();
-
- /* Teardown fns that use my own API functions BEFORE here */
- the_lnet.ln_refcount = 0;
-
- lnet_acceptor_stop();
- lnet_destroy_routes();
- lnet_shutdown_lndnis();
- lnet_unprepare();
- }
-
- mutex_unlock(&the_lnet.ln_api_mutex);
- return 0;
-}
-EXPORT_SYMBOL(LNetNIFini);
-
-/**
- * Grabs the ni data from the ni structure and fills the out
- * parameters
- *
- * \param[in] ni network interface structure
- * \param[out] config NI configuration
- */
-static void
-lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
-{
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
- struct lnet_ioctl_net_config *net_config;
- size_t min_size, tunable_size = 0;
- int i;
-
- if (!ni || !config)
- return;
-
- net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk;
- if (!net_config)
- return;
-
- BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
- ARRAY_SIZE(net_config->ni_interfaces));
-
- for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
- if (!ni->ni_interfaces[i])
- break;
-
- strncpy(net_config->ni_interfaces[i],
- ni->ni_interfaces[i],
- sizeof(net_config->ni_interfaces[i]));
- }
-
- config->cfg_nid = ni->ni_nid;
- config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
- config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
- config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
- config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
-
- net_config->ni_status = ni->ni_status->ns_status;
-
- if (ni->ni_cpts) {
- int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
-
- for (i = 0; i < num_cpts; i++)
- net_config->ni_cpts[i] = ni->ni_cpts[i];
-
- config->cfg_ncpts = num_cpts;
- }
-
- /*
- * See if user land tools sent in a newer and larger version
- * of struct lnet_tunables than what the kernel uses.
- */
- min_size = sizeof(*config) + sizeof(*net_config);
-
- if (config->cfg_hdr.ioc_len > min_size)
- tunable_size = config->cfg_hdr.ioc_len - min_size;
-
- /* Don't copy to much data to user space */
- min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
- lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
-
- if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
- memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
- config->cfg_config_u.cfg_net.net_interface_count = 1;
-
- /* Tell user land that kernel side has less data */
- if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
- min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
- config->cfg_hdr.ioc_len -= min_size;
- }
- }
-}
-
-static int
-lnet_get_net_config(struct lnet_ioctl_config_data *config)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
- int idx = config->cfg_count;
- int cpt, i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (i++ != idx)
- continue;
-
- ni = list_entry(tmp, struct lnet_ni, ni_list);
- lnet_ni_lock(ni);
- lnet_fill_ni_info(ni, config);
- lnet_ni_unlock(ni);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
-{
- char *nets = conf->cfg_config_u.cfg_net.net_intf;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- struct lnet_ni *ni;
- struct list_head net_head;
- struct lnet_remotenet *rnet;
- int rc;
-
- INIT_LIST_HEAD(&net_head);
-
- /* Create a ni structure for the network string */
- rc = lnet_parse_networks(&net_head, nets);
- if (rc <= 0)
- return !rc ? -EINVAL : rc;
-
- mutex_lock(&the_lnet.ln_api_mutex);
-
- if (rc > 1) {
- rc = -EINVAL; /* only add one interface per call */
- goto failed0;
- }
-
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-
- lnet_net_lock(LNET_LOCK_EX);
- rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
- lnet_net_unlock(LNET_LOCK_EX);
- /*
- * make sure that the net added doesn't invalidate the current
- * configuration LNet is keeping
- */
- if (rnet) {
- CERROR("Adding net %s will invalidate routing configuration\n",
- nets);
- rc = -EUSERS;
- goto failed0;
- }
-
- rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
- false);
- if (rc)
- goto failed0;
-
- list_del_init(&ni->ni_list);
-
- rc = lnet_startup_lndni(ni, conf);
- if (rc)
- goto failed1;
-
- if (ni->ni_lnd->lnd_accept) {
- rc = lnet_acceptor_start();
- if (rc < 0) {
- /* shutdown the ni that we just started */
- CERROR("Failed to start up acceptor thread\n");
- lnet_shutdown_lndni(ni);
- goto failed1;
- }
- }
-
- lnet_ping_target_update(pinfo, md_handle);
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return 0;
-
-failed1:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
-failed0:
- mutex_unlock(&the_lnet.ln_api_mutex);
- while (!list_empty(&net_head)) {
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
- }
- return rc;
-}
-
-int
-lnet_dyn_del_ni(__u32 net)
-{
- struct lnet_ni *ni;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- int rc;
-
- /* don't allow userspace to shutdown the LOLND */
- if (LNET_NETTYP(net) == LOLND)
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- /* create and link a new ping info, before removing the old one */
- rc = lnet_ping_info_setup(&pinfo, &md_handle,
- lnet_get_ni_count() - 1, false);
- if (rc)
- goto out;
-
- ni = lnet_net2ni(net);
- if (!ni) {
- rc = -EINVAL;
- goto failed;
- }
-
- /* decrement the reference counter taken by lnet_net2ni() */
- lnet_ni_decref_locked(ni, 0);
-
- lnet_shutdown_lndni(ni);
-
- if (!lnet_count_acceptor_nis())
- lnet_acceptor_stop();
-
- lnet_ping_target_update(pinfo, md_handle);
- goto out;
-failed:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
-out:
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return rc;
-}
-
-/**
- * LNet ioctl handler.
- *
- */
-int
-LNetCtl(unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- struct lnet_ioctl_config_data *config;
- struct lnet_process_id id = {0};
- struct lnet_ni *ni;
- int rc;
- unsigned long secs_passed;
-
- BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX <
- sizeof(struct lnet_ioctl_net_config) +
- sizeof(struct lnet_ioctl_config_data));
-
- switch (cmd) {
- case IOC_LIBCFS_GET_NI:
- rc = LNetGetId(data->ioc_count, &id);
- data->ioc_nid = id.nid;
- return rc;
-
- case IOC_LIBCFS_FAIL_NID:
- return lnet_fail_nid(data->ioc_nid, data->ioc_count);
-
- case IOC_LIBCFS_ADD_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_add_route(config->cfg_net,
- config->cfg_config_u.cfg_route.rtr_hop,
- config->cfg_nid,
- config->cfg_config_u.cfg_route.rtr_priority);
- if (!rc) {
- rc = lnet_check_routes();
- if (rc)
- lnet_del_route(config->cfg_net,
- config->cfg_nid);
- }
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_DEL_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_del_route(config->cfg_net, config->cfg_nid);
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_GET_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- return lnet_get_route(config->cfg_count,
- &config->cfg_net,
- &config->cfg_config_u.cfg_route.rtr_hop,
- &config->cfg_nid,
- &config->cfg_config_u.cfg_route.rtr_flags,
- &config->cfg_config_u.cfg_route.rtr_priority);
-
- case IOC_LIBCFS_GET_NET: {
- size_t total = sizeof(*config) +
- sizeof(struct lnet_ioctl_net_config);
- config = arg;
-
- if (config->cfg_hdr.ioc_len < total)
- return -EINVAL;
-
- return lnet_get_net_config(config);
- }
-
- case IOC_LIBCFS_GET_LNET_STATS: {
- struct lnet_ioctl_lnet_stats *lnet_stats = arg;
-
- if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
- return -EINVAL;
-
- lnet_counters_get(&lnet_stats->st_cntrs);
- return 0;
- }
-
- case IOC_LIBCFS_CONFIG_RTR:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- if (config->cfg_config_u.cfg_buffers.buf_enable) {
- rc = lnet_rtrpools_enable();
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
- lnet_rtrpools_disable();
- mutex_unlock(&the_lnet.ln_api_mutex);
- return 0;
-
- case IOC_LIBCFS_ADD_BUF:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny,
- config->cfg_config_u.cfg_buffers.buf_small,
- config->cfg_config_u.cfg_buffers.buf_large);
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_GET_BUF: {
- struct lnet_ioctl_pool_cfg *pool_cfg;
- size_t total = sizeof(*config) + sizeof(*pool_cfg);
-
- config = arg;
-
- if (config->cfg_hdr.ioc_len < total)
- return -EINVAL;
-
- pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
- return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
- }
-
- case IOC_LIBCFS_GET_PEER_INFO: {
- struct lnet_ioctl_peer *peer_info = arg;
-
- if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
- return -EINVAL;
-
- return lnet_get_peer_info(peer_info->pr_count,
- &peer_info->pr_nid,
- peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
- }
-
- case IOC_LIBCFS_NOTIFY_ROUTER:
- secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]);
- secs_passed *= msecs_to_jiffies(MSEC_PER_SEC);
-
- return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
- jiffies - secs_passed);
-
- case IOC_LIBCFS_LNET_DIST:
- rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
- if (rc < 0 && rc != -EHOSTUNREACH)
- return rc;
-
- data->ioc_u32[0] = rc;
- return 0;
-
- case IOC_LIBCFS_TESTPROTOCOMPAT:
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_testprotocompat = data->ioc_flags;
- lnet_net_unlock(LNET_LOCK_EX);
- return 0;
-
- case IOC_LIBCFS_LNET_FAULT:
- return lnet_fault_ctl(data->ioc_flags, data);
-
- case IOC_LIBCFS_PING:
- id.nid = data->ioc_nid;
- id.pid = data->ioc_u32[0];
- rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
- data->ioc_pbuf1,
- data->ioc_plen1 / sizeof(struct lnet_process_id));
- if (rc < 0)
- return rc;
- data->ioc_count = rc;
- return 0;
-
- default:
- ni = lnet_net2ni(data->ioc_net);
- if (!ni)
- return -EINVAL;
-
- if (!ni->ni_lnd->lnd_ctl)
- rc = -EINVAL;
- else
- rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
-
- lnet_ni_decref(ni);
- return rc;
- }
- /* not reached */
-}
-EXPORT_SYMBOL(LNetCtl);
-
-void LNetDebugPeer(struct lnet_process_id id)
-{
- lnet_debug_peer(id.nid);
-}
-EXPORT_SYMBOL(LNetDebugPeer);
-
-/**
- * Retrieve the lnet_process_id ID of LNet interface at \a index. Note that
- * all interfaces share a same PID, as requested by LNetNIInit().
- *
- * \param index Index of the interface to look up.
- * \param id On successful return, this location will hold the
- * lnet_process_id ID of the interface.
- *
- * \retval 0 If an interface exists at \a index.
- * \retval -ENOENT If no interface has been found.
- */
-int
-LNetGetId(unsigned int index, struct lnet_process_id *id)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
- int cpt;
- int rc = -ENOENT;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_net_lock_current();
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (index--)
- continue;
-
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- id->nid = ni->ni_nid;
- id->pid = the_lnet.ln_pid;
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-EXPORT_SYMBOL(LNetGetId);
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
- struct lnet_process_id __user *ids, int n_ids)
-{
- struct lnet_handle_eq eqh;
- struct lnet_handle_md mdh;
- struct lnet_event event;
- struct lnet_md md = { NULL };
- int which;
- int unlinked = 0;
- int replied = 0;
- const int a_long_time = 60000; /* mS */
- int infosz;
- struct lnet_ping_info *info;
- struct lnet_process_id tmpid;
- int i;
- int nob;
- int rc;
- int rc2;
-
- infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
-
- if (n_ids <= 0 ||
- id.nid == LNET_NID_ANY ||
- timeout_ms > 500000 || /* arbitrary limit! */
- n_ids > 20) /* arbitrary limit! */
- return -EINVAL;
-
- if (id.pid == LNET_PID_ANY)
- id.pid = LNET_PID_LUSTRE;
-
- info = kzalloc(infosz, GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- /* NB 2 events max (including any unlink event) */
- rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
- if (rc) {
- CERROR("Can't allocate EQ: %d\n", rc);
- goto out_0;
- }
-
- /* initialize md content */
- md.start = info;
- md.length = infosz;
- md.threshold = 2; /*GET/REPLY*/
- md.max_size = 0;
- md.options = LNET_MD_TRUNCATE;
- md.user_ptr = NULL;
- md.eq_handle = eqh;
-
- rc = LNetMDBind(md, LNET_UNLINK, &mdh);
- if (rc) {
- CERROR("Can't bind MD: %d\n", rc);
- goto out_1;
- }
-
- rc = LNetGet(LNET_NID_ANY, mdh, id,
- LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- if (rc) {
- /* Don't CERROR; this could be deliberate! */
-
- rc2 = LNetMDUnlink(mdh);
- LASSERT(!rc2);
-
- /* NB must wait for the UNLINK event below... */
- unlinked = 1;
- timeout_ms = a_long_time;
- }
-
- do {
- /* MUST block for unlink to complete */
-
- rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked,
- &event, &which);
-
- CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
- (rc2 <= 0) ? -1 : event.type,
- (rc2 <= 0) ? -1 : event.status,
- (rc2 > 0 && event.unlinked) ? " unlinked" : "");
-
- LASSERT(rc2 != -EOVERFLOW); /* can't miss anything */
-
- if (rc2 <= 0 || event.status) {
- /* timeout or error */
- if (!replied && !rc)
- rc = (rc2 < 0) ? rc2 :
- !rc2 ? -ETIMEDOUT :
- event.status;
-
- if (!unlinked) {
- /* Ensure completion in finite time... */
- LNetMDUnlink(mdh);
- /* No assertion (racing with network) */
- unlinked = 1;
- timeout_ms = a_long_time;
- } else if (!rc2) {
- /* timed out waiting for unlink */
- CWARN("ping %s: late network completion\n",
- libcfs_id2str(id));
- }
- } else if (event.type == LNET_EVENT_REPLY) {
- replied = 1;
- rc = event.mlength;
- }
-
- } while (rc2 <= 0 || !event.unlinked);
-
- if (!replied) {
- if (rc >= 0)
- CWARN("%s: Unexpected rc >= 0 but no reply!\n",
- libcfs_id2str(id));
- rc = -EIO;
- goto out_1;
- }
-
- nob = rc;
- LASSERT(nob >= 0 && nob <= infosz);
-
- rc = -EPROTO; /* if I can't parse... */
-
- if (nob < 8) {
- /* can't check magic/version */
- CERROR("%s: ping info too short %d\n",
- libcfs_id2str(id), nob);
- goto out_1;
- }
-
- if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
- lnet_swap_pinginfo(info);
- } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
- CERROR("%s: Unexpected magic %08x\n",
- libcfs_id2str(id), info->pi_magic);
- goto out_1;
- }
-
- if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) {
- CERROR("%s: ping w/o NI status: 0x%x\n",
- libcfs_id2str(id), info->pi_features);
- goto out_1;
- }
-
- if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
- CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
- nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
- goto out_1;
- }
-
- if (info->pi_nnis < n_ids)
- n_ids = info->pi_nnis;
-
- if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
- CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
- nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
- goto out_1;
- }
-
- rc = -EFAULT; /* If I SEGV... */
-
- memset(&tmpid, 0, sizeof(tmpid));
- for (i = 0; i < n_ids; i++) {
- tmpid.pid = info->pi_pid;
- tmpid.nid = info->pi_ni[i].ns_nid;
- if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
- goto out_1;
- }
- rc = info->pi_nnis;
-
- out_1:
- rc2 = LNetEQFree(eqh);
- if (rc2)
- CERROR("rc2 %d\n", rc2);
- LASSERT(!rc2);
-
- out_0:
- kfree(info);
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
deleted file mode 100644
index 0aea268a4f1c..000000000000
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ /dev/null
@@ -1,1234 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <linux/lnet/lib-lnet.h>
-
-struct lnet_text_buf { /* tmp struct for parsing routes */
- struct list_head ltb_list; /* stash on lists */
- int ltb_size; /* allocated size */
- char ltb_text[0]; /* text buffer */
-};
-
-static int lnet_tbnob; /* track text buf allocation */
-#define LNET_MAX_TEXTBUF_NOB (64 << 10) /* bound allocation */
-#define LNET_SINGLE_TEXTBUF_NOB (4 << 10)
-
-static void
-lnet_syntax(char *name, char *str, int offset, int width)
-{
- static char dots[LNET_SINGLE_TEXTBUF_NOB];
- static char dashes[LNET_SINGLE_TEXTBUF_NOB];
-
- memset(dots, '.', sizeof(dots));
- dots[sizeof(dots) - 1] = 0;
- memset(dashes, '-', sizeof(dashes));
- dashes[sizeof(dashes) - 1] = 0;
-
- LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
- LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
- (int)strlen(name), dots, offset, dots,
- (width < 1) ? 0 : width - 1, dashes);
-}
-
-static int
-lnet_issep(char c)
-{
- switch (c) {
- case '\n':
- case '\r':
- case ';':
- return 1;
- default:
- return 0;
- }
-}
-
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
-{
- struct list_head *tmp;
- struct lnet_ni *ni;
-
- list_for_each(tmp, nilist) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net)
- return 0;
- }
-
- return 1;
-}
-
-void
-lnet_ni_free(struct lnet_ni *ni)
-{
- int i;
-
- if (ni->ni_refs)
- cfs_percpt_free(ni->ni_refs);
-
- if (ni->ni_tx_queues)
- cfs_percpt_free(ni->ni_tx_queues);
-
- if (ni->ni_cpts)
- cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
-
- kfree(ni->ni_lnd_tunables);
-
- for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++)
- kfree(ni->ni_interfaces[i]);
-
- /* release reference to net namespace */
- if (ni->ni_net_ns)
- put_net(ni->ni_net_ns);
-
- kfree(ni);
-}
-
-struct lnet_ni *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
-{
- struct lnet_tx_queue *tq;
- struct lnet_ni *ni;
- int rc;
- int i;
-
- if (!lnet_net_unique(net, nilist)) {
- LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- ni = kzalloc(sizeof(*ni), GFP_NOFS);
- if (!ni) {
- CERROR("Out of memory creating network %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- spin_lock_init(&ni->ni_lock);
- INIT_LIST_HEAD(&ni->ni_cptlist);
- ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ni->ni_refs[0]));
- if (!ni->ni_refs)
- goto failed;
-
- ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ni->ni_tx_queues[0]));
- if (!ni->ni_tx_queues)
- goto failed;
-
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
- INIT_LIST_HEAD(&tq->tq_delayed);
-
- if (!el) {
- ni->ni_cpts = NULL;
- ni->ni_ncpts = LNET_CPT_NUMBER;
- } else {
- rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
- if (rc <= 0) {
- CERROR("Failed to set CPTs for NI %s: %d\n",
- libcfs_net2str(net), rc);
- goto failed;
- }
-
- LASSERT(rc <= LNET_CPT_NUMBER);
- if (rc == LNET_CPT_NUMBER) {
- cfs_expr_list_values_free(ni->ni_cpts, LNET_CPT_NUMBER);
- ni->ni_cpts = NULL;
- }
-
- ni->ni_ncpts = rc;
- }
-
- /* LND will fill in the address part of the NID */
- ni->ni_nid = LNET_MKNID(net, 0);
-
- /* Store net namespace in which current ni is being created */
- if (current->nsproxy->net_ns)
- ni->ni_net_ns = get_net(current->nsproxy->net_ns);
- else
- ni->ni_net_ns = NULL;
-
- ni->ni_last_alive = ktime_get_real_seconds();
- list_add_tail(&ni->ni_list, nilist);
- return ni;
- failed:
- lnet_ni_free(ni);
- return NULL;
-}
-
-int
-lnet_parse_networks(struct list_head *nilist, char *networks)
-{
- struct cfs_expr_list *el = NULL;
- char *tokens;
- char *str;
- char *tmp;
- struct lnet_ni *ni;
- __u32 net;
- int nnets = 0;
- struct list_head *temp_node;
-
- if (!networks) {
- CERROR("networks string is undefined\n");
- return -EINVAL;
- }
-
- if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
- /* _WAY_ conservative */
- LCONSOLE_ERROR_MSG(0x112,
- "Can't parse networks: string too long\n");
- return -EINVAL;
- }
-
- tokens = kstrdup(networks, GFP_KERNEL);
- if (!tokens) {
- CERROR("Can't allocate net tokens\n");
- return -ENOMEM;
- }
-
- tmp = tokens;
- str = tokens;
-
- while (str && *str) {
- char *comma = strchr(str, ',');
- char *bracket = strchr(str, '(');
- char *square = strchr(str, '[');
- char *iface;
- int niface;
- int rc;
-
- /*
- * NB we don't check interface conflicts here; it's the LNDs
- * responsibility (if it cares at all)
- */
- if (square && (!comma || square < comma)) {
- /*
- * i.e: o2ib0(ib0)[1,2], number between square
- * brackets are CPTs this NI needs to be bond
- */
- if (bracket && bracket > square) {
- tmp = square;
- goto failed_syntax;
- }
-
- tmp = strchr(square, ']');
- if (!tmp) {
- tmp = square;
- goto failed_syntax;
- }
-
- rc = cfs_expr_list_parse(square, tmp - square + 1,
- 0, LNET_CPT_NUMBER - 1, &el);
- if (rc) {
- tmp = square;
- goto failed_syntax;
- }
-
- while (square <= tmp)
- *square++ = ' ';
- }
-
- if (!bracket || (comma && comma < bracket)) {
- /* no interface list specified */
-
- if (comma)
- *comma++ = 0;
- net = libcfs_str2net(strim(str));
-
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- LCONSOLE_ERROR_MSG(0x113,
- "Unrecognised network type\n");
- tmp = str;
- goto failed_syntax;
- }
-
- if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
- !lnet_ni_alloc(net, el, nilist))
- goto failed;
-
- if (el) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- str = comma;
- continue;
- }
-
- *bracket = 0;
- net = libcfs_str2net(strim(str));
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- tmp = str;
- goto failed_syntax;
- }
-
- ni = lnet_ni_alloc(net, el, nilist);
- if (!ni)
- goto failed;
-
- if (el) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- niface = 0;
- iface = bracket + 1;
-
- bracket = strchr(iface, ')');
- if (!bracket) {
- tmp = iface;
- goto failed_syntax;
- }
-
- *bracket = 0;
- do {
- comma = strchr(iface, ',');
- if (comma)
- *comma++ = 0;
-
- iface = strim(iface);
- if (!*iface) {
- tmp = iface;
- goto failed_syntax;
- }
-
- if (niface == LNET_MAX_INTERFACES) {
- LCONSOLE_ERROR_MSG(0x115,
- "Too many interfaces for net %s\n",
- libcfs_net2str(net));
- goto failed;
- }
-
- /*
- * Allocate a separate piece of memory and copy
- * into it the string, so we don't have
- * a depencency on the tokens string. This way we
- * can free the tokens at the end of the function.
- * The newly allocated ni_interfaces[] can be
- * freed when freeing the NI
- */
- ni->ni_interfaces[niface] = kstrdup(iface, GFP_KERNEL);
- if (!ni->ni_interfaces[niface]) {
- CERROR("Can't allocate net interface name\n");
- goto failed;
- }
- niface++;
- iface = comma;
- } while (iface);
-
- str = bracket + 1;
- comma = strchr(bracket + 1, ',');
- if (comma) {
- *comma = 0;
- str = strim(str);
- if (*str) {
- tmp = str;
- goto failed_syntax;
- }
- str = comma + 1;
- continue;
- }
-
- str = strim(str);
- if (*str) {
- tmp = str;
- goto failed_syntax;
- }
- }
-
- list_for_each(temp_node, nilist)
- nnets++;
-
- kfree(tokens);
- return nnets;
-
- failed_syntax:
- lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
- failed:
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, struct lnet_ni, ni_list);
-
- list_del(&ni->ni_list);
- lnet_ni_free(ni);
- }
-
- if (el)
- cfs_expr_list_free(el);
-
- kfree(tokens);
-
- return -EINVAL;
-}
-
-static struct lnet_text_buf *
-lnet_new_text_buf(int str_len)
-{
- struct lnet_text_buf *ltb;
- int nob;
-
- /* NB allocate space for the terminating 0 */
- nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]);
- if (nob > LNET_SINGLE_TEXTBUF_NOB) {
- /* _way_ conservative for "route net gateway..." */
- CERROR("text buffer too big\n");
- return NULL;
- }
-
- if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
- CERROR("Too many text buffers\n");
- return NULL;
- }
-
- ltb = kzalloc(nob, GFP_KERNEL);
- if (!ltb)
- return NULL;
-
- ltb->ltb_size = nob;
- ltb->ltb_text[0] = 0;
- lnet_tbnob += nob;
- return ltb;
-}
-
-static void
-lnet_free_text_buf(struct lnet_text_buf *ltb)
-{
- lnet_tbnob -= ltb->ltb_size;
- kfree(ltb);
-}
-
-static void
-lnet_free_text_bufs(struct list_head *tbs)
-{
- struct lnet_text_buf *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- }
-}
-
-static int
-lnet_str2tbs_sep(struct list_head *tbs, char *str)
-{
- struct list_head pending;
- char *sep;
- int nob;
- int i;
- struct lnet_text_buf *ltb;
-
- INIT_LIST_HEAD(&pending);
-
- /* Split 'str' into separate commands */
- for (;;) {
- /* skip leading whitespace */
- while (isspace(*str))
- str++;
-
- /* scan for separator or comment */
- for (sep = str; *sep; sep++)
- if (lnet_issep(*sep) || *sep == '#')
- break;
-
- nob = (int)(sep - str);
- if (nob > 0) {
- ltb = lnet_new_text_buf(nob);
- if (!ltb) {
- lnet_free_text_bufs(&pending);
- return -ENOMEM;
- }
-
- for (i = 0; i < nob; i++)
- if (isspace(str[i]))
- ltb->ltb_text[i] = ' ';
- else
- ltb->ltb_text[i] = str[i];
-
- ltb->ltb_text[nob] = 0;
-
- list_add_tail(&ltb->ltb_list, &pending);
- }
-
- if (*sep == '#') {
- /* scan for separator */
- do {
- sep++;
- } while (*sep && !lnet_issep(*sep));
- }
-
- if (!*sep)
- break;
-
- str = sep + 1;
- }
-
- list_splice(&pending, tbs->prev);
- return 0;
-}
-
-static int
-lnet_expand1tb(struct list_head *list,
- char *str, char *sep1, char *sep2,
- char *item, int itemlen)
-{
- int len1 = (int)(sep1 - str);
- int len2 = strlen(sep2 + 1);
- struct lnet_text_buf *ltb;
-
- LASSERT(*sep1 == '[');
- LASSERT(*sep2 == ']');
-
- ltb = lnet_new_text_buf(len1 + itemlen + len2);
- if (!ltb)
- return -ENOMEM;
-
- memcpy(ltb->ltb_text, str, len1);
- memcpy(&ltb->ltb_text[len1], item, itemlen);
- memcpy(&ltb->ltb_text[len1 + itemlen], sep2 + 1, len2);
- ltb->ltb_text[len1 + itemlen + len2] = 0;
-
- list_add_tail(&ltb->ltb_list, list);
- return 0;
-}
-
-static int
-lnet_str2tbs_expand(struct list_head *tbs, char *str)
-{
- char num[16];
- struct list_head pending;
- char *sep;
- char *sep2;
- char *parsed;
- char *enditem;
- int lo;
- int hi;
- int stride;
- int i;
- int nob;
- int scanned;
-
- INIT_LIST_HEAD(&pending);
-
- sep = strchr(str, '[');
- if (!sep) /* nothing to expand */
- return 0;
-
- sep2 = strchr(sep, ']');
- if (!sep2)
- goto failed;
-
- for (parsed = sep; parsed < sep2; parsed = enditem) {
- enditem = ++parsed;
- while (enditem < sep2 && *enditem != ',')
- enditem++;
-
- if (enditem == parsed) /* no empty items */
- goto failed;
-
- if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi,
- &stride, &scanned) < 3) {
- if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
- /* simple string enumeration */
- if (lnet_expand1tb(&pending, str, sep, sep2,
- parsed,
- (int)(enditem - parsed))) {
- goto failed;
- }
- continue;
- }
-
- stride = 1;
- }
-
- /* range expansion */
-
- if (enditem != parsed + scanned) /* no trailing junk */
- goto failed;
-
- if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
- (hi - lo) % stride)
- goto failed;
-
- for (i = lo; i <= hi; i += stride) {
- snprintf(num, sizeof(num), "%d", i);
- nob = strlen(num);
- if (nob + 1 == sizeof(num))
- goto failed;
-
- if (lnet_expand1tb(&pending, str, sep, sep2,
- num, nob))
- goto failed;
- }
- }
-
- list_splice(&pending, tbs->prev);
- return 1;
-
- failed:
- lnet_free_text_bufs(&pending);
- return -EINVAL;
-}
-
-static int
-lnet_parse_hops(char *str, unsigned int *hops)
-{
- int len = strlen(str);
- int nob = len;
-
- return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
- nob == len &&
- *hops > 0 && *hops < 256);
-}
-
-#define LNET_PRIORITY_SEPARATOR (':')
-
-static int
-lnet_parse_priority(char *str, unsigned int *priority, char **token)
-{
- int nob;
- char *sep;
- int len;
-
- sep = strchr(str, LNET_PRIORITY_SEPARATOR);
- if (!sep) {
- *priority = 0;
- return 0;
- }
- len = strlen(sep + 1);
-
- if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) {
- /*
- * Update the caller's token pointer so it treats the found
- * priority as the token to report in the error message.
- */
- *token += sep - str + 1;
- return -EINVAL;
- }
-
- CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob);
-
- /*
- * Change priority separator to \0 to be able to parse NID
- */
- *sep = '\0';
- return 0;
-}
-
-static int
-lnet_parse_route(char *str, int *im_a_router)
-{
- /* static scratch buffer OK (single threaded) */
- static char cmd[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head nets;
- struct list_head gateways;
- struct list_head *tmp1;
- struct list_head *tmp2;
- __u32 net;
- lnet_nid_t nid;
- struct lnet_text_buf *ltb;
- int rc;
- char *sep;
- char *token = str;
- int ntokens = 0;
- int myrc = -1;
- __u32 hops;
- int got_hops = 0;
- unsigned int priority = 0;
-
- INIT_LIST_HEAD(&gateways);
- INIT_LIST_HEAD(&nets);
-
- /* save a copy of the string for error messages */
- strncpy(cmd, str, sizeof(cmd));
- cmd[sizeof(cmd) - 1] = '\0';
-
- sep = str;
- for (;;) {
- /* scan for token start */
- while (isspace(*sep))
- sep++;
- if (!*sep) {
- if (ntokens < (got_hops ? 3 : 2))
- goto token_error;
- break;
- }
-
- ntokens++;
- token = sep++;
-
- /* scan for token end */
- while (*sep && !isspace(*sep))
- sep++;
- if (*sep)
- *sep++ = 0;
-
- if (ntokens == 1) {
- tmp2 = &nets; /* expanding nets */
- } else if (ntokens == 2 &&
- lnet_parse_hops(token, &hops)) {
- got_hops = 1; /* got a hop count */
- continue;
- } else {
- tmp2 = &gateways; /* expanding gateways */
- }
-
- ltb = lnet_new_text_buf(strlen(token));
- if (!ltb)
- goto out;
-
- strcpy(ltb->ltb_text, token);
- tmp1 = &ltb->ltb_list;
- list_add_tail(tmp1, tmp2);
-
- while (tmp1 != tmp2) {
- ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
-
- rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
- if (rc < 0)
- goto token_error;
-
- tmp1 = tmp1->next;
-
- if (rc > 0) { /* expanded! */
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- continue;
- }
-
- if (ntokens == 1) {
- net = libcfs_str2net(ltb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND)
- goto token_error;
- } else {
- rc = lnet_parse_priority(ltb->ltb_text,
- &priority, &token);
- if (rc < 0)
- goto token_error;
-
- nid = libcfs_str2nid(ltb->ltb_text);
- if (nid == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
- goto token_error;
- }
- }
- }
-
- /**
- * if there are no hops set then we want to flag this value as
- * unset since hops is an optional parameter
- */
- if (!got_hops)
- hops = LNET_UNDEFINED_HOPS;
-
- LASSERT(!list_empty(&nets));
- LASSERT(!list_empty(&gateways));
-
- list_for_each(tmp1, &nets) {
- ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
- net = libcfs_str2net(ltb->ltb_text);
- LASSERT(net != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each(tmp2, &gateways) {
- ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list);
- nid = libcfs_str2nid(ltb->ltb_text);
- LASSERT(nid != LNET_NID_ANY);
-
- if (lnet_islocalnid(nid)) {
- *im_a_router = 1;
- continue;
- }
-
- rc = lnet_add_route(net, hops, nid, priority);
- if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) {
- CERROR("Can't create route to %s via %s\n",
- libcfs_net2str(net),
- libcfs_nid2str(nid));
- goto out;
- }
- }
- }
-
- myrc = 0;
- goto out;
-
- token_error:
- lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
- out:
- lnet_free_text_bufs(&nets);
- lnet_free_text_bufs(&gateways);
- return myrc;
-}
-
-static int
-lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
-{
- struct lnet_text_buf *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
- if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
- lnet_free_text_bufs(tbs);
- return -EINVAL;
- }
-
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- }
-
- return 0;
-}
-
-int
-lnet_parse_routes(char *routes, int *im_a_router)
-{
- struct list_head tbs;
- int rc = 0;
-
- *im_a_router = 0;
-
- INIT_LIST_HEAD(&tbs);
-
- if (lnet_str2tbs_sep(&tbs, routes) < 0) {
- CERROR("Error parsing routes\n");
- rc = -EINVAL;
- } else {
- rc = lnet_parse_route_tbs(&tbs, im_a_router);
- }
-
- LASSERT(!lnet_tbnob);
- return rc;
-}
-
-static int
-lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
-{
- LIST_HEAD(list);
- int rc;
- int i;
-
- rc = cfs_ip_addr_parse(token, len, &list);
- if (rc)
- return rc;
-
- for (rc = i = 0; !rc && i < nip; i++)
- rc = cfs_ip_addr_match(ipaddrs[i], &list);
-
- cfs_expr_list_free_list(&list);
-
- return rc;
-}
-
-static int
-lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
-{
- static char tokens[LNET_SINGLE_TEXTBUF_NOB];
-
- int matched = 0;
- int ntokens = 0;
- int len;
- char *net = NULL;
- char *sep;
- char *token;
- int rc;
-
- LASSERT(strlen(net_entry) < sizeof(tokens));
-
- /* work on a copy of the string */
- strcpy(tokens, net_entry);
- sep = tokens;
- for (;;) {
- /* scan for token start */
- while (isspace(*sep))
- sep++;
- if (!*sep)
- break;
-
- token = sep++;
-
- /* scan for token end */
- while (*sep && !isspace(*sep))
- sep++;
- if (*sep)
- *sep++ = 0;
-
- if (!ntokens++) {
- net = token;
- continue;
- }
-
- len = strlen(token);
-
- rc = lnet_match_network_token(token, len, ipaddrs, nip);
- if (rc < 0) {
- lnet_syntax("ip2nets", net_entry,
- (int)(token - tokens), len);
- return rc;
- }
-
- if (rc)
- matched |= 1;
- }
-
- if (!matched)
- return 0;
-
- strcpy(net_entry, net); /* replace with matched net */
- return 1;
-}
-
-static __u32
-lnet_netspec2net(char *netspec)
-{
- char *bracket = strchr(netspec, '(');
- __u32 net;
-
- if (bracket)
- *bracket = 0;
-
- net = libcfs_str2net(netspec);
-
- if (bracket)
- *bracket = '(';
-
- return net;
-}
-
-static int
-lnet_splitnets(char *source, struct list_head *nets)
-{
- int offset = 0;
- int offset2;
- int len;
- struct lnet_text_buf *tb;
- struct lnet_text_buf *tb2;
- struct list_head *t;
- char *sep;
- char *bracket;
- __u32 net;
-
- LASSERT(!list_empty(nets));
- LASSERT(nets->next == nets->prev); /* single entry */
-
- tb = list_entry(nets->next, struct lnet_text_buf, ltb_list);
-
- for (;;) {
- sep = strchr(tb->ltb_text, ',');
- bracket = strchr(tb->ltb_text, '(');
-
- if (sep && bracket && bracket < sep) {
- /* netspec lists interfaces... */
-
- offset2 = offset + (int)(bracket - tb->ltb_text);
- len = strlen(bracket);
-
- bracket = strchr(bracket + 1, ')');
-
- if (!bracket ||
- !(bracket[1] == ',' || !bracket[1])) {
- lnet_syntax("ip2nets", source, offset2, len);
- return -EINVAL;
- }
-
- sep = !bracket[1] ? NULL : bracket + 1;
- }
-
- if (sep)
- *sep++ = 0;
-
- net = lnet_netspec2net(tb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
-
- list_for_each(t, nets) {
- tb2 = list_entry(t, struct lnet_text_buf, ltb_list);
-
- if (tb2 == tb)
- continue;
-
- if (net == lnet_netspec2net(tb2->ltb_text)) {
- /* duplicate network */
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
- }
-
- if (!sep)
- return 0;
-
- offset += (int)(sep - tb->ltb_text);
- len = strlen(sep);
- tb2 = lnet_new_text_buf(len);
- if (!tb2)
- return -ENOMEM;
-
- strncpy(tb2->ltb_text, sep, len);
- tb2->ltb_text[len] = '\0';
- list_add_tail(&tb2->ltb_list, nets);
-
- tb = tb2;
- }
-}
-
-static int
-lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
-{
- static char networks[LNET_SINGLE_TEXTBUF_NOB];
- static char source[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head raw_entries;
- struct list_head matched_nets;
- struct list_head current_nets;
- struct list_head *t;
- struct list_head *t2;
- struct lnet_text_buf *tb;
- struct lnet_text_buf *temp;
- struct lnet_text_buf *tb2;
- __u32 net1;
- __u32 net2;
- int len;
- int count;
- int dup;
- int rc;
-
- INIT_LIST_HEAD(&raw_entries);
- if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
- CERROR("Error parsing ip2nets\n");
- LASSERT(!lnet_tbnob);
- return -EINVAL;
- }
-
- INIT_LIST_HEAD(&matched_nets);
- INIT_LIST_HEAD(&current_nets);
- networks[0] = 0;
- count = 0;
- len = 0;
- rc = 0;
-
- list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) {
- strncpy(source, tb->ltb_text, sizeof(source));
- source[sizeof(source) - 1] = '\0';
-
- /* replace ltb_text with the network(s) add on match */
- rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
- if (rc < 0)
- break;
-
- list_del(&tb->ltb_list);
-
- if (!rc) { /* no match */
- lnet_free_text_buf(tb);
- continue;
- }
-
- /* split into separate networks */
- INIT_LIST_HEAD(&current_nets);
- list_add(&tb->ltb_list, &current_nets);
- rc = lnet_splitnets(source, &current_nets);
- if (rc < 0)
- break;
-
- dup = 0;
- list_for_each(t, &current_nets) {
- tb = list_entry(t, struct lnet_text_buf, ltb_list);
- net1 = lnet_netspec2net(tb->ltb_text);
- LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each(t2, &matched_nets) {
- tb2 = list_entry(t2, struct lnet_text_buf,
- ltb_list);
- net2 = lnet_netspec2net(tb2->ltb_text);
- LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY));
-
- if (net1 == net2) {
- dup = 1;
- break;
- }
- }
-
- if (dup)
- break;
- }
-
- if (dup) {
- lnet_free_text_bufs(&current_nets);
- continue;
- }
-
- list_for_each_safe(t, t2, &current_nets) {
- tb = list_entry(t, struct lnet_text_buf, ltb_list);
-
- list_del(&tb->ltb_list);
- list_add_tail(&tb->ltb_list, &matched_nets);
-
- len += snprintf(networks + len, sizeof(networks) - len,
- "%s%s", !len ? "" : ",",
- tb->ltb_text);
-
- if (len >= sizeof(networks)) {
- CERROR("Too many matched networks\n");
- rc = -E2BIG;
- goto out;
- }
- }
-
- count++;
- }
-
- out:
- lnet_free_text_bufs(&raw_entries);
- lnet_free_text_bufs(&matched_nets);
- lnet_free_text_bufs(&current_nets);
- LASSERT(!lnet_tbnob);
-
- if (rc < 0)
- return rc;
-
- *networksp = networks;
- return count;
-}
-
-static int
-lnet_ipaddr_enumerate(__u32 **ipaddrsp)
-{
- int up;
- __u32 netmask;
- __u32 *ipaddrs;
- __u32 *ipaddrs2;
- int nip;
- char **ifnames;
- int nif = lnet_ipif_enumerate(&ifnames);
- int i;
- int rc;
-
- if (nif <= 0)
- return nif;
-
- ipaddrs = kcalloc(nif, sizeof(*ipaddrs), GFP_KERNEL);
- if (!ipaddrs) {
- CERROR("Can't allocate ipaddrs[%d]\n", nif);
- lnet_ipif_free_enumeration(ifnames, nif);
- return -ENOMEM;
- }
-
- for (i = nip = 0; i < nif; i++) {
- if (!strcmp(ifnames[i], "lo"))
- continue;
-
- rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask);
- if (rc) {
- CWARN("Can't query interface %s: %d\n",
- ifnames[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s: it's down\n",
- ifnames[i]);
- continue;
- }
-
- nip++;
- }
-
- lnet_ipif_free_enumeration(ifnames, nif);
-
- if (nip == nif) {
- *ipaddrsp = ipaddrs;
- } else {
- if (nip > 0) {
- ipaddrs2 = kcalloc(nip, sizeof(*ipaddrs2),
- GFP_KERNEL);
- if (!ipaddrs2) {
- CERROR("Can't allocate ipaddrs[%d]\n", nip);
- nip = -ENOMEM;
- } else {
- memcpy(ipaddrs2, ipaddrs,
- nip * sizeof(*ipaddrs));
- *ipaddrsp = ipaddrs2;
- rc = nip;
- }
- }
- kfree(ipaddrs);
- }
- return nip;
-}
-
-int
-lnet_parse_ip2nets(char **networksp, char *ip2nets)
-{
- __u32 *ipaddrs = NULL;
- int nip = lnet_ipaddr_enumerate(&ipaddrs);
- int rc;
-
- if (nip < 0) {
- LCONSOLE_ERROR_MSG(0x117,
- "Error %d enumerating local IP interfaces for ip2nets to match\n",
- nip);
- return nip;
- }
-
- if (!nip) {
- LCONSOLE_ERROR_MSG(0x118,
- "No local IP interfaces for ip2nets to match\n");
- return -ENOENT;
- }
-
- rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
- kfree(ipaddrs);
-
- if (rc < 0) {
- LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
- return rc;
- }
-
- if (!rc) {
- LCONSOLE_ERROR_MSG(0x11a,
- "ip2nets does not match any local IP interfaces\n");
- return -ENOENT;
- }
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
deleted file mode 100644
index ea53b5cb3f72..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ /dev/null
@@ -1,426 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-eq.c
- *
- * Library level Event queue management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create an event queue that has room for \a count number of events.
- *
- * The event queue is circular and older events will be overwritten by new
- * ones if they are not removed in time by the user using the functions
- * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
- * determine the appropriate size of the event queue to prevent this loss
- * of events. Note that when EQ handler is specified in \a callback, no
- * event loss can happen, since the handler is run for each event deposited
- * into the EQ.
- *
- * \param count The number of events to be stored in the event queue. It
- * will be rounded up to the next power of two.
- * \param callback A handler function that runs when an event is deposited
- * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
- * indicate that no event handler is desired.
- * \param handle On successful return, this location will hold a handle for
- * the newly created EQ.
- *
- * \retval 0 On success.
- * \retval -EINVAL If an parameter is not valid.
- * \retval -ENOMEM If memory for the EQ can't be allocated.
- *
- * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
- */
-int
-LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
- struct lnet_handle_eq *handle)
-{
- struct lnet_eq *eq;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- /*
- * We need count to be a power of 2 so that when eq_{enq,deq}_seq
- * overflow, they don't skip entries, so the queue has the same
- * apparent capacity at all times
- */
- if (count)
- count = roundup_pow_of_two(count);
-
- if (callback != LNET_EQ_HANDLER_NONE && count)
- CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count);
-
- /*
- * count can be 0 if only need callback, we can eliminate
- * overhead of enqueue event
- */
- if (!count && callback == LNET_EQ_HANDLER_NONE)
- return -EINVAL;
-
- eq = kzalloc(sizeof(*eq), GFP_NOFS);
- if (!eq)
- return -ENOMEM;
-
- if (count) {
- eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event),
- GFP_KERNEL | __GFP_ZERO);
- if (!eq->eq_events)
- goto failed;
- /*
- * NB allocator has set all event sequence numbers to 0,
- * so all them should be earlier than eq_deq_seq
- */
- }
-
- eq->eq_deq_seq = 1;
- eq->eq_enq_seq = 1;
- eq->eq_size = count;
- eq->eq_callback = callback;
-
- eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*eq->eq_refs[0]));
- if (!eq->eq_refs)
- goto failed;
-
- /* MUST hold both exclusive lnet_res_lock */
- lnet_res_lock(LNET_LOCK_EX);
- /*
- * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
- * both EQ lookup and poll event with only lnet_eq_wait_lock
- */
- lnet_eq_wait_lock();
-
- lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
- list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
-
- lnet_eq_wait_unlock();
- lnet_res_unlock(LNET_LOCK_EX);
-
- lnet_eq2handle(handle, eq);
- return 0;
-
-failed:
- kvfree(eq->eq_events);
-
- if (eq->eq_refs)
- cfs_percpt_free(eq->eq_refs);
-
- kfree(eq);
- return -ENOMEM;
-}
-EXPORT_SYMBOL(LNetEQAlloc);
-
-/**
- * Release the resources associated with an event queue if it's idle;
- * otherwise do nothing and it's up to the user to try again.
- *
- * \param eqh A handle for the event queue to be released.
- *
- * \retval 0 If the EQ is not in use and freed.
- * \retval -ENOENT If \a eqh does not point to a valid EQ.
- * \retval -EBUSY If the EQ is still in use by some MDs.
- */
-int
-LNetEQFree(struct lnet_handle_eq eqh)
-{
- struct lnet_eq *eq;
- struct lnet_event *events = NULL;
- int **refs = NULL;
- int *ref;
- int rc = 0;
- int size = 0;
- int i;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- lnet_res_lock(LNET_LOCK_EX);
- /*
- * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
- * both EQ lookup and poll event with only lnet_eq_wait_lock
- */
- lnet_eq_wait_lock();
-
- eq = lnet_handle2eq(&eqh);
- if (!eq) {
- rc = -ENOENT;
- goto out;
- }
-
- cfs_percpt_for_each(ref, i, eq->eq_refs) {
- LASSERT(*ref >= 0);
- if (!*ref)
- continue;
-
- CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
- i, *ref);
- rc = -EBUSY;
- goto out;
- }
-
- /* stash for free after lock dropped */
- events = eq->eq_events;
- size = eq->eq_size;
- refs = eq->eq_refs;
-
- lnet_res_lh_invalidate(&eq->eq_lh);
- list_del(&eq->eq_list);
- kfree(eq);
- out:
- lnet_eq_wait_unlock();
- lnet_res_unlock(LNET_LOCK_EX);
-
- kvfree(events);
- if (refs)
- cfs_percpt_free(refs);
-
- return rc;
-}
-EXPORT_SYMBOL(LNetEQFree);
-
-void
-lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
- /* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
- int index;
-
- if (!eq->eq_size) {
- LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
- eq->eq_callback(ev);
- return;
- }
-
- lnet_eq_wait_lock();
- ev->sequence = eq->eq_enq_seq++;
-
- LASSERT(eq->eq_size == LOWEST_BIT_SET(eq->eq_size));
- index = ev->sequence & (eq->eq_size - 1);
-
- eq->eq_events[index] = *ev;
-
- if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
- eq->eq_callback(ev);
-
- /* Wake anyone waiting in LNetEQPoll() */
- if (waitqueue_active(&the_lnet.ln_eq_waitq))
- wake_up_all(&the_lnet.ln_eq_waitq);
- lnet_eq_wait_unlock();
-}
-
-static int
-lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
- int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
- struct lnet_event *new_event = &eq->eq_events[new_index];
- int rc;
-
- /* must called with lnet_eq_wait_lock hold */
- if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
- return 0;
-
- /* We've got a new event... */
- *ev = *new_event;
-
- CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
- new_event, eq->eq_deq_seq, eq->eq_size);
-
- /* ...but did it overwrite an event we've not seen yet? */
- if (eq->eq_deq_seq == new_event->sequence) {
- rc = 1;
- } else {
- /*
- * don't complain with CERROR: some EQs are sized small
- * anyway; if it's important, the caller should complain
- */
- CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
- eq->eq_deq_seq, new_event->sequence);
- rc = -EOVERFLOW;
- }
-
- eq->eq_deq_seq = new_event->sequence + 1;
- return rc;
-}
-
-/**
- * A nonblocking function that can be used to get the next event in an EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. The event is removed from the queue.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 0 No pending event in the EQ.
- * \retval 1 Indicates success.
- * \retval -ENOENT If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-/**
- * Block the calling process until there is an event in the EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. This function returns the next event
- * in the EQ and removes it from the EQ.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 1 Indicates success.
- * \retval -ENOENT If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-static int
-lnet_eq_wait_locked(int *timeout_ms, long state)
-__must_hold(&the_lnet.ln_eq_wait_lock)
-{
- int tms = *timeout_ms;
- int wait;
- wait_queue_entry_t wl;
- unsigned long now;
-
- if (!tms)
- return -ENXIO; /* don't want to wait and no new event */
-
- init_waitqueue_entry(&wl, current);
- set_current_state(state);
- add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
- lnet_eq_wait_unlock();
-
- if (tms < 0) {
- schedule();
- } else {
- now = jiffies;
- schedule_timeout(msecs_to_jiffies(tms));
- tms -= jiffies_to_msecs(jiffies - now);
- if (tms < 0) /* no more wait but may have new event */
- tms = 0;
- }
-
- wait = tms; /* might need to call here again */
- *timeout_ms = tms;
-
- lnet_eq_wait_lock();
- remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
- return wait;
-}
-
-/**
- * Block the calling process until there's an event from a set of EQs or
- * timeout happens.
- *
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully, in which case the corresponding event
- * is consumed.
- *
- * LNetEQPoll() provides a timeout to allow applications to poll, block for a
- * fixed period, or block indefinitely.
- *
- * \param eventqs,neq An array of EQ handles, and size of the array.
- * \param timeout_ms Time in milliseconds to wait for an event to occur on
- * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
- * infinite timeout.
- * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD
- * \param event,which On successful return (1 or -EOVERFLOW), \a event will
- * hold the next event in the EQs, and \a which will contain the index of the
- * EQ from which the event was taken.
- *
- * \retval 0 No pending event in the EQs after timeout.
- * \retval 1 Indicates success.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ indicated by \a which has been dropped due to limited space in the EQ.
- * \retval -ENOENT If there's an invalid handle in \a eventqs.
- */
-int
-LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms,
- int interruptible,
- struct lnet_event *event, int *which)
-{
- int wait = 1;
- int rc;
- int i;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (neq < 1)
- return -ENOENT;
-
- lnet_eq_wait_lock();
-
- for (;;) {
- for (i = 0; i < neq; i++) {
- struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]);
-
- if (!eq) {
- lnet_eq_wait_unlock();
- return -ENOENT;
- }
-
- rc = lnet_eq_dequeue_event(eq, event);
- if (rc) {
- lnet_eq_wait_unlock();
- *which = i;
- return rc;
- }
- }
-
- if (!wait)
- break;
-
- /*
- * return value of lnet_eq_wait_locked:
- * -1 : did nothing and it's sure no new event
- * 1 : sleep inside and wait until new event
- * 0 : don't want to wait anymore, but might have new event
- * so need to call dequeue again
- */
- wait = lnet_eq_wait_locked(&timeout_ms,
- interruptible ? TASK_INTERRUPTIBLE
- : TASK_NOLOAD);
- if (wait < 0) /* no new event */
- break;
- }
-
- lnet_eq_wait_unlock();
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
deleted file mode 100644
index 8a22514aaf71..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ /dev/null
@@ -1,463 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-md.c
- *
- * Memory Descriptor management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_unlink(struct lnet_libmd *md)
-{
- if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) {
- /* first unlink attempt... */
- struct lnet_me *me = md->md_me;
-
- md->md_flags |= LNET_MD_FLAG_ZOMBIE;
-
- /*
- * Disassociate from ME (if any),
- * and unlink it if it was created
- * with LNET_UNLINK
- */
- if (me) {
- /* detach MD from portal */
- lnet_ptl_detach_md(me, md);
- if (me->me_unlink == LNET_UNLINK)
- lnet_me_unlink(me);
- }
-
- /* ensure all future handle lookups fail */
- lnet_res_lh_invalidate(&md->md_lh);
- }
-
- if (md->md_refcount) {
- CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
- return;
- }
-
- CDEBUG(D_NET, "Unlinking md %p\n", md);
-
- if (md->md_eq) {
- int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
-
- LASSERT(*md->md_eq->eq_refs[cpt] > 0);
- (*md->md_eq->eq_refs[cpt])--;
- }
-
- LASSERT(!list_empty(&md->md_list));
- list_del_init(&md->md_list);
- kfree(md);
-}
-
-static int
-lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink)
-{
- int i;
- unsigned int niov;
- int total_length = 0;
-
- lmd->md_me = NULL;
- lmd->md_start = umd->start;
- lmd->md_offset = 0;
- lmd->md_max_size = umd->max_size;
- lmd->md_options = umd->options;
- lmd->md_user_ptr = umd->user_ptr;
- lmd->md_eq = NULL;
- lmd->md_threshold = umd->threshold;
- lmd->md_refcount = 0;
- lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
-
- if (umd->options & LNET_MD_IOVEC) {
- if (umd->options & LNET_MD_KIOV) /* Can't specify both */
- return -EINVAL;
-
- niov = umd->length;
- lmd->md_niov = umd->length;
- memcpy(lmd->md_iov.iov, umd->start,
- niov * sizeof(lmd->md_iov.iov[0]));
-
- for (i = 0; i < (int)niov; i++) {
- /* We take the base address on trust */
- /* invalid length */
- if (lmd->md_iov.iov[i].iov_len <= 0)
- return -EINVAL;
-
- total_length += lmd->md_iov.iov[i].iov_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) /* illegal max_size */
- return -EINVAL;
-
- } else if (umd->options & LNET_MD_KIOV) {
- niov = umd->length;
- lmd->md_niov = umd->length;
- memcpy(lmd->md_iov.kiov, umd->start,
- niov * sizeof(lmd->md_iov.kiov[0]));
-
- for (i = 0; i < (int)niov; i++) {
- /* We take the page pointer on trust */
- if (lmd->md_iov.kiov[i].bv_offset +
- lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
- return -EINVAL; /* invalid length */
-
- total_length += lmd->md_iov.kiov[i].bv_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) /* illegal max_size */
- return -EINVAL;
- } else { /* contiguous */
- lmd->md_length = umd->length;
- niov = 1;
- lmd->md_niov = 1;
- lmd->md_iov.iov[0].iov_base = umd->start;
- lmd->md_iov.iov[0].iov_len = umd->length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > (int)umd->length)) /* illegal max_size */
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* must be called with resource lock held */
-static int
-lnet_md_link(struct lnet_libmd *md, struct lnet_handle_eq eq_handle, int cpt)
-{
- struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
-
- /*
- * NB we are passed an allocated, but inactive md.
- * if we return success, caller may lnet_md_unlink() it.
- * otherwise caller may only kfree() it.
- */
- /*
- * This implementation doesn't know how to create START events or
- * disable END events. Best to LASSERT our caller is compliant so
- * we find out quickly...
- */
- /*
- * TODO - reevaluate what should be here in light of
- * the removal of the start and end events
- * maybe there we shouldn't even allow LNET_EQ_NONE!)
- * LASSERT(!eq);
- */
- if (!LNetEQHandleIsInvalid(eq_handle)) {
- md->md_eq = lnet_handle2eq(&eq_handle);
-
- if (!md->md_eq)
- return -ENOENT;
-
- (*md->md_eq->eq_refs[cpt])++;
- }
-
- lnet_res_lh_initialize(container, &md->md_lh);
-
- LASSERT(list_empty(&md->md_list));
- list_add(&md->md_list, &container->rec_active);
-
- return 0;
-}
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd)
-{
- /* NB this doesn't copy out all the iov entries so when a
- * discontiguous MD is copied out, the target gets to know the
- * original iov pointer (in start) and the number of entries it had
- * and that's all.
- */
- umd->start = lmd->md_start;
- umd->length = !(lmd->md_options &
- (LNET_MD_IOVEC | LNET_MD_KIOV)) ?
- lmd->md_length : lmd->md_niov;
- umd->threshold = lmd->md_threshold;
- umd->max_size = lmd->md_max_size;
- umd->options = lmd->md_options;
- umd->user_ptr = lmd->md_user_ptr;
- lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
-}
-
-static int
-lnet_md_validate(struct lnet_md *umd)
-{
- if (!umd->start && umd->length) {
- CERROR("MD start pointer can not be NULL with length %u\n",
- umd->length);
- return -EINVAL;
- }
-
- if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) &&
- umd->length > LNET_MAX_IOV) {
- CERROR("Invalid option: too many fragments %u, %d max\n",
- umd->length, LNET_MAX_IOV);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * Create a memory descriptor and attach it to a ME
- *
- * \param meh A handle for a ME to associate the new MD with.
- * \param umd Provides initial values for the user-visible parts of a MD.
- * Other than its use for initialization, there is no linkage between this
- * structure and the MD maintained by the LNet.
- * \param unlink A flag to indicate whether the MD is automatically unlinked
- * when it becomes inactive, either because the operation threshold drops to
- * zero or because the available memory becomes less than \a umd.max_size.
- * (Note that the check for unlinking a MD only occurs after the completion
- * of a successful operation on the MD.) The value LNET_UNLINK enables auto
- * unlinking; the value LNET_RETAIN disables it.
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink().
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
- * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
- * calling LNetInvalidateHandle() on it.
- * \retval -EBUSY If the ME pointed to by \a meh is already associated with
- * a MD.
- */
-int
-LNetMDAttach(struct lnet_handle_me meh, struct lnet_md umd,
- enum lnet_unlink unlink, struct lnet_handle_md *handle)
-{
- LIST_HEAD(matches);
- LIST_HEAD(drops);
- struct lnet_me *me;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (lnet_md_validate(&umd))
- return -EINVAL;
-
- if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
- CERROR("Invalid option: no MD_OP set\n");
- return -EINVAL;
- }
-
- md = lnet_md_alloc(&umd);
- if (!md)
- return -ENOMEM;
-
- rc = lnet_md_build(md, &umd, unlink);
- if (rc)
- goto out_free;
-
- cpt = lnet_cpt_of_cookie(meh.cookie);
-
- lnet_res_lock(cpt);
-
- me = lnet_handle2me(&meh);
- if (!me)
- rc = -ENOENT;
- else if (me->me_md)
- rc = -EBUSY;
- else
- rc = lnet_md_link(md, umd.eq_handle, cpt);
-
- if (rc)
- goto out_unlock;
-
- /*
- * attach this MD to portal of ME and check if it matches any
- * blocked msgs on this portal
- */
- lnet_ptl_attach_md(me, md, &matches, &drops);
-
- lnet_md2handle(handle, md);
-
- lnet_res_unlock(cpt);
-
- lnet_drop_delayed_msg_list(&drops, "Bad match");
- lnet_recv_delayed_msg_list(&matches);
-
- return 0;
-
-out_unlock:
- lnet_res_unlock(cpt);
-out_free:
- kfree(md);
- return rc;
-}
-EXPORT_SYMBOL(LNetMDAttach);
-
-/**
- * Create a "free floating" memory descriptor - a MD that is not associated
- * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
- *
- * \param umd,unlink See the discussion for LNetMDAttach().
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
- * and LNetGet() operations.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
- * it's OK to supply a NULL \a umd.eq_handle by calling
- * LNetInvalidateHandle() on it.
- */
-int
-LNetMDBind(struct lnet_md umd, enum lnet_unlink unlink,
- struct lnet_handle_md *handle)
-{
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (lnet_md_validate(&umd))
- return -EINVAL;
-
- if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
- CERROR("Invalid option: GET|PUT illegal on active MDs\n");
- return -EINVAL;
- }
-
- md = lnet_md_alloc(&umd);
- if (!md)
- return -ENOMEM;
-
- rc = lnet_md_build(md, &umd, unlink);
- if (rc)
- goto out_free;
-
- cpt = lnet_res_lock_current();
-
- rc = lnet_md_link(md, umd.eq_handle, cpt);
- if (rc)
- goto out_unlock;
-
- lnet_md2handle(handle, md);
-
- lnet_res_unlock(cpt);
- return 0;
-
-out_unlock:
- lnet_res_unlock(cpt);
-out_free:
- kfree(md);
-
- return rc;
-}
-EXPORT_SYMBOL(LNetMDBind);
-
-/**
- * Unlink the memory descriptor from any ME it may be linked to and release
- * the internal resources associated with it. As a result, active messages
- * associated with the MD may get aborted.
- *
- * This function does not free the memory region associated with the MD;
- * i.e., the memory the user allocated for this MD. If the ME associated with
- * this MD is not NULL and was created with auto unlink enabled, the ME is
- * unlinked as well (see LNetMEAttach()).
- *
- * Explicitly unlinking a MD via this function call has the same behavior as
- * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
- * is generated in the latter case.
- *
- * An unlinked event can be reported in two ways:
- * - If there's no pending operations on the MD, it's unlinked immediately
- * and an LNET_EVENT_UNLINK event is logged before this function returns.
- * - Otherwise, the MD is only marked for deletion when this function
- * returns, and the unlinked event will be piggybacked on the event of
- * the completion of the last operation by setting the unlinked field of
- * the event. No dedicated LNET_EVENT_UNLINK event is generated.
- *
- * Note that in both cases the unlinked field of the event is always set; no
- * more event will happen on the MD after such an event is logged.
- *
- * \param mdh A handle for the MD to be unlinked.
- *
- * \retval 0 On success.
- * \retval -ENOENT If \a mdh does not point to a valid MD object.
- */
-int
-LNetMDUnlink(struct lnet_handle_md mdh)
-{
- struct lnet_event ev;
- struct lnet_libmd *md;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md) {
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- md->md_flags |= LNET_MD_FLAG_ABORTED;
- /*
- * If the MD is busy, lnet_md_unlink just marks it for deletion, and
- * when the LND is done, the completion event flags that the MD was
- * unlinked. Otherwise, we enqueue an event now...
- */
- if (md->md_eq && !md->md_refcount) {
- lnet_build_unlink_event(md, &ev);
- lnet_eq_enqueue_event(md->md_eq, &ev);
- }
-
- lnet_md_unlink(md);
-
- lnet_res_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMDUnlink);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c
deleted file mode 100644
index 672e37bdd045..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-me.c
- *
- * Match Entry management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create and attach a match entry to the match list of \a portal. The new
- * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
- * can be used to attach a MD to an empty ME.
- *
- * \param portal The portal table index where the ME should be attached.
- * \param match_id Specifies the match criteria for the process ID of
- * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
- * used to wildcard either of the identifiers in the lnet_process_id
- * structure.
- * \param match_bits,ignore_bits Specify the match criteria to apply
- * to the match bits in the incoming request. The ignore bits are used
- * to mask out insignificant bits in the incoming match bits. The resulting
- * bits are then compared to the ME's match bits to determine if the
- * incoming request meets the match criteria.
- * \param unlink Indicates whether the ME should be unlinked when the memory
- * descriptor associated with it is unlinked (Note that the check for
- * unlinking a ME only occurs when the memory descriptor is unlinked.).
- * Valid values are LNET_RETAIN and LNET_UNLINK.
- * \param pos Indicates whether the new ME should be prepended or
- * appended to the match list. Allowed constants: LNET_INS_BEFORE,
- * LNET_INS_AFTER.
- * \param handle On successful returns, a handle to the newly created ME
- * object is saved here. This handle can be used later in LNetMEInsert(),
- * LNetMEUnlink(), or LNetMDAttach() functions.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is invalid.
- * \retval -ENOMEM If new ME object cannot be allocated.
- */
-int
-LNetMEAttach(unsigned int portal,
- struct lnet_process_id match_id,
- __u64 match_bits, __u64 ignore_bits,
- enum lnet_unlink unlink, enum lnet_ins_pos pos,
- struct lnet_handle_me *handle)
-{
- struct lnet_match_table *mtable;
- struct lnet_me *me;
- struct list_head *head;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if ((int)portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- mtable = lnet_mt_of_attach(portal, match_id,
- match_bits, ignore_bits, pos);
- if (!mtable) /* can't match portal type */
- return -EPERM;
-
- me = kzalloc(sizeof(*me), GFP_NOFS);
- if (!me)
- return -ENOMEM;
-
- lnet_res_lock(mtable->mt_cpt);
-
- me->me_portal = portal;
- me->me_match_id = match_id;
- me->me_match_bits = match_bits;
- me->me_ignore_bits = ignore_bits;
- me->me_unlink = unlink;
- me->me_md = NULL;
-
- lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
- &me->me_lh);
- if (ignore_bits)
- head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
- else
- head = lnet_mt_match_head(mtable, match_id, match_bits);
-
- me->me_pos = head - &mtable->mt_mhash[0];
- if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
- list_add_tail(&me->me_list, head);
- else
- list_add(&me->me_list, head);
-
- lnet_me2handle(handle, me);
-
- lnet_res_unlock(mtable->mt_cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMEAttach);
-
-/**
- * Create and a match entry and insert it before or after the ME pointed to by
- * \a current_meh. The new ME is empty, i.e. not associated with a memory
- * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
- *
- * This function is identical to LNetMEAttach() except for the position
- * where the new ME is inserted.
- *
- * \param current_meh A handle for a ME. The new ME will be inserted
- * immediately before or immediately after this ME.
- * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
- * for LNetMEAttach().
- *
- * \retval 0 On success.
- * \retval -ENOMEM If new ME object cannot be allocated.
- * \retval -ENOENT If \a current_meh does not point to a valid match entry.
- */
-int
-LNetMEInsert(struct lnet_handle_me current_meh,
- struct lnet_process_id match_id,
- __u64 match_bits, __u64 ignore_bits,
- enum lnet_unlink unlink, enum lnet_ins_pos pos,
- struct lnet_handle_me *handle)
-{
- struct lnet_me *current_me;
- struct lnet_me *new_me;
- struct lnet_portal *ptl;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (pos == LNET_INS_LOCAL)
- return -EPERM;
-
- new_me = kzalloc(sizeof(*new_me), GFP_NOFS);
- if (!new_me)
- return -ENOMEM;
-
- cpt = lnet_cpt_of_cookie(current_meh.cookie);
-
- lnet_res_lock(cpt);
-
- current_me = lnet_handle2me(&current_meh);
- if (!current_me) {
- kfree(new_me);
-
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- LASSERT(current_me->me_portal < the_lnet.ln_nportals);
-
- ptl = the_lnet.ln_portals[current_me->me_portal];
- if (lnet_ptl_is_unique(ptl)) {
- /* nosense to insertion on unique portal */
- kfree(new_me);
- lnet_res_unlock(cpt);
- return -EPERM;
- }
-
- new_me->me_pos = current_me->me_pos;
- new_me->me_portal = current_me->me_portal;
- new_me->me_match_id = match_id;
- new_me->me_match_bits = match_bits;
- new_me->me_ignore_bits = ignore_bits;
- new_me->me_unlink = unlink;
- new_me->me_md = NULL;
-
- lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
-
- if (pos == LNET_INS_AFTER)
- list_add(&new_me->me_list, &current_me->me_list);
- else
- list_add_tail(&new_me->me_list, &current_me->me_list);
-
- lnet_me2handle(handle, new_me);
-
- lnet_res_unlock(cpt);
-
- return 0;
-}
-EXPORT_SYMBOL(LNetMEInsert);
-
-/**
- * Unlink a match entry from its match list.
- *
- * This operation also releases any resources associated with the ME. If a
- * memory descriptor is attached to the ME, then it will be unlinked as well
- * and an unlink event will be generated. It is an error to use the ME handle
- * after calling LNetMEUnlink().
- *
- * \param meh A handle for the ME to be unlinked.
- *
- * \retval 0 On success.
- * \retval -ENOENT If \a meh does not point to a valid ME.
- * \see LNetMDUnlink() for the discussion on delivering unlink event.
- */
-int
-LNetMEUnlink(struct lnet_handle_me meh)
-{
- struct lnet_me *me;
- struct lnet_libmd *md;
- struct lnet_event ev;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_cpt_of_cookie(meh.cookie);
- lnet_res_lock(cpt);
-
- me = lnet_handle2me(&meh);
- if (!me) {
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- md = me->me_md;
- if (md) {
- md->md_flags |= LNET_MD_FLAG_ABORTED;
- if (md->md_eq && !md->md_refcount) {
- lnet_build_unlink_event(md, &ev);
- lnet_eq_enqueue_event(md->md_eq, &ev);
- }
- }
-
- lnet_me_unlink(me);
-
- lnet_res_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMEUnlink);
-
-/* call with lnet_res_lock please */
-void
-lnet_me_unlink(struct lnet_me *me)
-{
- list_del(&me->me_list);
-
- if (me->me_md) {
- struct lnet_libmd *md = me->me_md;
-
- /* detach MD from portal of this ME */
- lnet_ptl_detach_md(me, md);
- lnet_md_unlink(md);
- }
-
- lnet_res_lh_invalidate(&me->me_lh);
- kfree(me);
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
deleted file mode 100644
index ed43b3f4b114..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ /dev/null
@@ -1,2388 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-move.c
- *
- * Data movement routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-
-static int local_nid_dist_zero = 1;
-module_param(local_nid_dist_zero, int, 0444);
-MODULE_PARM_DESC(local_nid_dist_zero, "Reserved");
-
-int
-lnet_fail_nid(lnet_nid_t nid, unsigned int threshold)
-{
- struct lnet_test_peer *tp;
- struct lnet_test_peer *temp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
-
- /* NB: use lnet_net_lock(0) to serialize operations on test peers */
- if (threshold) {
- /* Adding a new entry */
- tp = kzalloc(sizeof(*tp), GFP_NOFS);
- if (!tp)
- return -ENOMEM;
-
- tp->tp_nid = nid;
- tp->tp_threshold = threshold;
-
- lnet_net_lock(0);
- list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers);
- lnet_net_unlock(0);
- return 0;
- }
-
- /* removing entries */
- INIT_LIST_HEAD(&cull);
-
- lnet_net_lock(0);
-
- list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
- tp = list_entry(el, struct lnet_test_peer, tp_list);
-
- if (!tp->tp_threshold || /* needs culling anyway */
- nid == LNET_NID_ANY || /* removing all entries */
- tp->tp_nid == nid) { /* matched this one */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- }
-
- lnet_net_unlock(0);
-
- list_for_each_entry_safe(tp, temp, &cull, tp_list) {
- list_del(&tp->tp_list);
- kfree(tp);
- }
- return 0;
-}
-
-static int
-fail_peer(lnet_nid_t nid, int outgoing)
-{
- struct lnet_test_peer *tp;
- struct lnet_test_peer *temp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
- int fail = 0;
-
- INIT_LIST_HEAD(&cull);
-
- /* NB: use lnet_net_lock(0) to serialize operations on test peers */
- lnet_net_lock(0);
-
- list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
- tp = list_entry(el, struct lnet_test_peer, tp_list);
-
- if (!tp->tp_threshold) {
- /* zombie entry */
- if (outgoing) {
- /*
- * only cull zombies on outgoing tests,
- * since we may be at interrupt priority on
- * incoming messages.
- */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- continue;
- }
-
- if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
- nid == tp->tp_nid) { /* fail this peer */
- fail = 1;
-
- if (tp->tp_threshold != LNET_MD_THRESH_INF) {
- tp->tp_threshold--;
- if (outgoing &&
- !tp->tp_threshold) {
- /* see above */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- }
- break;
- }
- }
-
- lnet_net_unlock(0);
-
- list_for_each_entry_safe(tp, temp, &cull, tp_list) {
- list_del(&tp->tp_list);
-
- kfree(tp);
- }
-
- return fail;
-}
-
-unsigned int
-lnet_iov_nob(unsigned int niov, struct kvec *iov)
-{
- unsigned int nob = 0;
-
- LASSERT(!niov || iov);
- while (niov-- > 0)
- nob += (iov++)->iov_len;
-
- return nob;
-}
-EXPORT_SYMBOL(lnet_iov_nob);
-
-void
-lnet_copy_iov2iter(struct iov_iter *to,
- unsigned int nsiov, const struct kvec *siov,
- unsigned int soffset, unsigned int nob)
-{
- /* NB diov, siov are READ-ONLY */
- const char *s;
- size_t left;
-
- if (!nob)
- return;
-
- /* skip complete frags before 'soffset' */
- LASSERT(nsiov > 0);
- while (soffset >= siov->iov_len) {
- soffset -= siov->iov_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- s = (char *)siov->iov_base + soffset;
- left = siov->iov_len - soffset;
- do {
- size_t n, copy = left;
-
- LASSERT(nsiov > 0);
-
- if (copy > nob)
- copy = nob;
- n = copy_to_iter(s, copy, to);
- if (n != copy)
- return;
- nob -= n;
-
- siov++;
- s = (char *)siov->iov_base;
- left = siov->iov_len;
- nsiov--;
- } while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_iov2iter);
-
-void
-lnet_copy_kiov2iter(struct iov_iter *to,
- unsigned int nsiov, const struct bio_vec *siov,
- unsigned int soffset, unsigned int nob)
-{
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(nsiov > 0);
- while (soffset >= siov->bv_len) {
- soffset -= siov->bv_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- do {
- size_t copy = siov->bv_len - soffset, n;
-
- LASSERT(nsiov > 0);
-
- if (copy > nob)
- copy = nob;
- n = copy_page_to_iter(siov->bv_page,
- siov->bv_offset + soffset,
- copy, to);
- if (n != copy)
- return;
- nob -= n;
- siov++;
- nsiov--;
- soffset = 0;
- } while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2iter);
-
-int
-lnet_extract_iov(int dst_niov, struct kvec *dst,
- int src_niov, const struct kvec *src,
- unsigned int offset, unsigned int len)
-{
- /*
- * Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src'
- */
- unsigned int frag_len;
- unsigned int niov;
-
- if (!len) /* no data => */
- return 0; /* no frags */
-
- LASSERT(src_niov > 0);
- while (offset >= src->iov_len) { /* skip initial frags */
- offset -= src->iov_len;
- src_niov--;
- src++;
- LASSERT(src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT(src_niov > 0);
- LASSERT((int)niov <= dst_niov);
-
- frag_len = src->iov_len - offset;
- dst->iov_base = ((char *)src->iov_base) + offset;
-
- if (len <= frag_len) {
- dst->iov_len = len;
- return niov;
- }
-
- dst->iov_len = frag_len;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-EXPORT_SYMBOL(lnet_extract_iov);
-
-unsigned int
-lnet_kiov_nob(unsigned int niov, struct bio_vec *kiov)
-{
- unsigned int nob = 0;
-
- LASSERT(!niov || kiov);
- while (niov-- > 0)
- nob += (kiov++)->bv_len;
-
- return nob;
-}
-EXPORT_SYMBOL(lnet_kiov_nob);
-
-int
-lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
- int src_niov, const struct bio_vec *src,
- unsigned int offset, unsigned int len)
-{
- /*
- * Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src'
- */
- unsigned int frag_len;
- unsigned int niov;
-
- if (!len) /* no data => */
- return 0; /* no frags */
-
- LASSERT(src_niov > 0);
- while (offset >= src->bv_len) { /* skip initial frags */
- offset -= src->bv_len;
- src_niov--;
- src++;
- LASSERT(src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT(src_niov > 0);
- LASSERT((int)niov <= dst_niov);
-
- frag_len = src->bv_len - offset;
- dst->bv_page = src->bv_page;
- dst->bv_offset = src->bv_offset + offset;
-
- if (len <= frag_len) {
- dst->bv_len = len;
- LASSERT(dst->bv_offset + dst->bv_len
- <= PAGE_SIZE);
- return niov;
- }
-
- dst->bv_len = frag_len;
- LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-EXPORT_SYMBOL(lnet_extract_kiov);
-
-void
-lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, unsigned int offset, unsigned int mlen,
- unsigned int rlen)
-{
- unsigned int niov = 0;
- struct kvec *iov = NULL;
- struct bio_vec *kiov = NULL;
- struct iov_iter to;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(!mlen || msg);
-
- if (msg) {
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
- LASSERT(rlen == msg->msg_len);
- LASSERT(mlen <= msg->msg_len);
- LASSERT(msg->msg_offset == offset);
- LASSERT(msg->msg_wanted == mlen);
-
- msg->msg_receiving = 0;
-
- if (mlen) {
- niov = msg->msg_niov;
- iov = msg->msg_iov;
- kiov = msg->msg_kiov;
-
- LASSERT(niov > 0);
- LASSERT(!iov != !kiov);
- }
- }
-
- if (iov) {
- iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
- iov_iter_advance(&to, offset);
- } else {
- iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
- iov_iter_advance(&to, offset);
- }
- rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-static void
-lnet_setpayloadbuffer(struct lnet_msg *msg)
-{
- struct lnet_libmd *md = msg->msg_md;
-
- LASSERT(msg->msg_len > 0);
- LASSERT(!msg->msg_routing);
- LASSERT(md);
- LASSERT(!msg->msg_niov);
- LASSERT(!msg->msg_iov);
- LASSERT(!msg->msg_kiov);
-
- msg->msg_niov = md->md_niov;
- if (md->md_options & LNET_MD_KIOV)
- msg->msg_kiov = md->md_iov.kiov;
- else
- msg->msg_iov = md->md_iov.iov;
-}
-
-void
-lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target,
- unsigned int offset, unsigned int len)
-{
- msg->msg_type = type;
- msg->msg_target = target;
- msg->msg_len = len;
- msg->msg_offset = offset;
-
- if (len)
- lnet_setpayloadbuffer(msg);
-
- memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr));
- msg->msg_hdr.type = cpu_to_le32(type);
- msg->msg_hdr.dest_nid = cpu_to_le64(target.nid);
- msg->msg_hdr.dest_pid = cpu_to_le32(target.pid);
- /* src_nid will be set later */
- msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid);
- msg->msg_hdr.payload_length = cpu_to_le32(len);
-}
-
-static void
-lnet_ni_send(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- void *priv = msg->msg_private;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
- (msg->msg_txcredit && msg->msg_peertxcredit));
-
- rc = ni->ni_lnd->lnd_send(ni, priv, msg);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-static int
-lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc;
-
- LASSERT(!msg->msg_sending);
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_rx_ready_delay);
- LASSERT(ni->ni_lnd->lnd_eager_recv);
-
- msg->msg_rx_ready_delay = 1;
- rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg,
- &msg->msg_private);
- if (rc) {
- CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n",
- libcfs_nid2str(msg->msg_rxpeer->lp_nid),
- libcfs_id2str(msg->msg_target), rc);
- LASSERT(rc < 0); /* required by my callers */
- }
-
- return rc;
-}
-
-/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
-static void
-lnet_ni_query_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
- unsigned long last_alive = 0;
-
- LASSERT(lnet_peer_aliveness_enabled(lp));
- LASSERT(ni->ni_lnd->lnd_query);
-
- lnet_net_unlock(lp->lp_cpt);
- ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive);
- lnet_net_lock(lp->lp_cpt);
-
- lp->lp_last_query = cfs_time_current();
-
- if (last_alive) /* NI has updated timestamp */
- lp->lp_last_alive = last_alive;
-}
-
-/* NB: always called with lnet_net_lock held */
-static inline int
-lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
-{
- int alive;
- unsigned long deadline;
-
- LASSERT(lnet_peer_aliveness_enabled(lp));
-
- /* Trust lnet_notify() if it has more recent aliveness news, but
- * ignore the initial assumed death (see lnet_peers_start_down()).
- */
- if (!lp->lp_alive && lp->lp_alive_count > 0 &&
- cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
- return 0;
-
- deadline = cfs_time_add(lp->lp_last_alive,
- lp->lp_ni->ni_peertimeout * HZ);
- alive = cfs_time_after(deadline, now);
-
- /* Update obsolete lp_alive except for routers assumed to be dead
- * initially, because router checker would update aliveness in this
- * case, and moreover lp_last_alive at peer creation is assumed.
- */
- if (alive && !lp->lp_alive &&
- !(lnet_isrouter(lp) && !lp->lp_alive_count))
- lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
-
- return alive;
-}
-
-/*
- * NB: returns 1 when alive, 0 when dead, negative when error;
- * may drop the lnet_net_lock
- */
-static int
-lnet_peer_alive_locked(struct lnet_peer *lp)
-{
- unsigned long now = cfs_time_current();
-
- if (!lnet_peer_aliveness_enabled(lp))
- return -ENODEV;
-
- if (lnet_peer_is_alive(lp, now))
- return 1;
-
- /*
- * Peer appears dead, but we should avoid frequent NI queries (at
- * most once per lnet_queryinterval seconds).
- */
- if (lp->lp_last_query) {
- static const int lnet_queryinterval = 1;
-
- unsigned long next_query =
- cfs_time_add(lp->lp_last_query,
- lnet_queryinterval * HZ);
-
- if (time_before(now, next_query)) {
- if (lp->lp_alive)
- CWARN("Unexpected aliveness of peer %s: %d < %d (%d/%d)\n",
- libcfs_nid2str(lp->lp_nid),
- (int)now, (int)next_query,
- lnet_queryinterval,
- lp->lp_ni->ni_peertimeout);
- return 0;
- }
- }
-
- /* query NI for latest aliveness news */
- lnet_ni_query_locked(lp->lp_ni, lp);
-
- if (lnet_peer_is_alive(lp, now))
- return 1;
-
- lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
- return 0;
-}
-
-/**
- * \param msg The message to be sent.
- * \param do_send True if lnet_ni_send() should be called in this function.
- * lnet_send() is going to lnet_net_unlock immediately after this, so
- * it sets do_send FALSE and I don't do the unlock/send/lock bit.
- *
- * \retval LNET_CREDIT_OK If \a msg sent or OK to send.
- * \retval LNET_CREDIT_WAIT If \a msg blocked for credit.
- * \retval -EHOSTUNREACH If the next hop of the message appears dead.
- * \retval -ECANCELED If the MD of the message has been unlinked.
- */
-static int
-lnet_post_send_locked(struct lnet_msg *msg, int do_send)
-{
- struct lnet_peer *lp = msg->msg_txpeer;
- struct lnet_ni *ni = lp->lp_ni;
- int cpt = msg->msg_tx_cpt;
- struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
-
- /* non-lnet_send() callers have checked before */
- LASSERT(!do_send || msg->msg_tx_delayed);
- LASSERT(!msg->msg_receiving);
- LASSERT(msg->msg_tx_committed);
-
- /* NB 'lp' is always the next hop */
- if (!(msg->msg_target.pid & LNET_PID_USERFLAG) &&
- !lnet_peer_alive_locked(lp)) {
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
- lnet_net_unlock(cpt);
-
- CNETERR("Dropping message for %s: peer not alive\n",
- libcfs_id2str(msg->msg_target));
- if (do_send)
- lnet_finalize(ni, msg, -EHOSTUNREACH);
-
- lnet_net_lock(cpt);
- return -EHOSTUNREACH;
- }
-
- if (msg->msg_md &&
- (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) {
- lnet_net_unlock(cpt);
-
- CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n",
- libcfs_id2str(msg->msg_target));
- if (do_send)
- lnet_finalize(ni, msg, -ECANCELED);
-
- lnet_net_lock(cpt);
- return -ECANCELED;
- }
-
- if (!msg->msg_peertxcredit) {
- LASSERT((lp->lp_txcredits < 0) ==
- !list_empty(&lp->lp_txq));
-
- msg->msg_peertxcredit = 1;
- lp->lp_txqnob += msg->msg_len + sizeof(struct lnet_hdr);
- lp->lp_txcredits--;
-
- if (lp->lp_txcredits < lp->lp_mintxcredits)
- lp->lp_mintxcredits = lp->lp_txcredits;
-
- if (lp->lp_txcredits < 0) {
- msg->msg_tx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_txq);
- return LNET_CREDIT_WAIT;
- }
- }
-
- if (!msg->msg_txcredit) {
- LASSERT((tq->tq_credits < 0) ==
- !list_empty(&tq->tq_delayed));
-
- msg->msg_txcredit = 1;
- tq->tq_credits--;
-
- if (tq->tq_credits < tq->tq_credits_min)
- tq->tq_credits_min = tq->tq_credits;
-
- if (tq->tq_credits < 0) {
- msg->msg_tx_delayed = 1;
- list_add_tail(&msg->msg_list, &tq->tq_delayed);
- return LNET_CREDIT_WAIT;
- }
- }
-
- if (do_send) {
- lnet_net_unlock(cpt);
- lnet_ni_send(ni, msg);
- lnet_net_lock(cpt);
- }
- return LNET_CREDIT_OK;
-}
-
-static struct lnet_rtrbufpool *
-lnet_msg2bufpool(struct lnet_msg *msg)
-{
- struct lnet_rtrbufpool *rbp;
- int cpt;
-
- LASSERT(msg->msg_rx_committed);
-
- cpt = msg->msg_rx_cpt;
- rbp = &the_lnet.ln_rtrpools[cpt][0];
-
- LASSERT(msg->msg_len <= LNET_MTU);
- while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) {
- rbp++;
- LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
- }
-
- return rbp;
-}
-
-static int
-lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
-{
- /*
- * lnet_parse is going to lnet_net_unlock immediately after this, so it
- * sets do_recv FALSE and I don't do the unlock/send/lock bit.
- * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if
- * received or OK to receive
- */
- struct lnet_peer *lp = msg->msg_rxpeer;
- struct lnet_rtrbufpool *rbp;
- struct lnet_rtrbuf *rb;
-
- LASSERT(!msg->msg_iov);
- LASSERT(!msg->msg_kiov);
- LASSERT(!msg->msg_niov);
- LASSERT(msg->msg_routing);
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
-
- /* non-lnet_parse callers only receive delayed messages */
- LASSERT(!do_recv || msg->msg_rx_delayed);
-
- if (!msg->msg_peerrtrcredit) {
- LASSERT((lp->lp_rtrcredits < 0) ==
- !list_empty(&lp->lp_rtrq));
-
- msg->msg_peerrtrcredit = 1;
- lp->lp_rtrcredits--;
- if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
- lp->lp_minrtrcredits = lp->lp_rtrcredits;
-
- if (lp->lp_rtrcredits < 0) {
- /* must have checked eager_recv before here */
- LASSERT(msg->msg_rx_ready_delay);
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_rtrq);
- return LNET_CREDIT_WAIT;
- }
- }
-
- rbp = lnet_msg2bufpool(msg);
-
- if (!msg->msg_rtrcredit) {
- msg->msg_rtrcredit = 1;
- rbp->rbp_credits--;
- if (rbp->rbp_credits < rbp->rbp_mincredits)
- rbp->rbp_mincredits = rbp->rbp_credits;
-
- if (rbp->rbp_credits < 0) {
- /* must have checked eager_recv before here */
- LASSERT(msg->msg_rx_ready_delay);
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
- return LNET_CREDIT_WAIT;
- }
- }
-
- LASSERT(!list_empty(&rbp->rbp_bufs));
- rb = list_entry(rbp->rbp_bufs.next, struct lnet_rtrbuf, rb_list);
- list_del(&rb->rb_list);
-
- msg->msg_niov = rbp->rbp_npages;
- msg->msg_kiov = &rb->rb_kiov[0];
-
- if (do_recv) {
- int cpt = msg->msg_rx_cpt;
-
- lnet_net_unlock(cpt);
- lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
- 0, msg->msg_len, msg->msg_len);
- lnet_net_lock(cpt);
- }
- return LNET_CREDIT_OK;
-}
-
-void
-lnet_return_tx_credits_locked(struct lnet_msg *msg)
-{
- struct lnet_peer *txpeer = msg->msg_txpeer;
- struct lnet_msg *msg2;
-
- if (msg->msg_txcredit) {
- struct lnet_ni *ni = txpeer->lp_ni;
- struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
-
- /* give back NI txcredits */
- msg->msg_txcredit = 0;
-
- LASSERT((tq->tq_credits < 0) ==
- !list_empty(&tq->tq_delayed));
-
- tq->tq_credits++;
- if (tq->tq_credits <= 0) {
- msg2 = list_entry(tq->tq_delayed.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT(msg2->msg_txpeer->lp_ni == ni);
- LASSERT(msg2->msg_tx_delayed);
-
- (void)lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (msg->msg_peertxcredit) {
- /* give back peer txcredits */
- msg->msg_peertxcredit = 0;
-
- LASSERT((txpeer->lp_txcredits < 0) ==
- !list_empty(&txpeer->lp_txq));
-
- txpeer->lp_txqnob -= msg->msg_len + sizeof(struct lnet_hdr);
- LASSERT(txpeer->lp_txqnob >= 0);
-
- txpeer->lp_txcredits++;
- if (txpeer->lp_txcredits <= 0) {
- msg2 = list_entry(txpeer->lp_txq.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT(msg2->msg_txpeer == txpeer);
- LASSERT(msg2->msg_tx_delayed);
-
- (void)lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (txpeer) {
- msg->msg_txpeer = NULL;
- lnet_peer_decref_locked(txpeer);
- }
-}
-
-void
-lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp)
-{
- struct lnet_msg *msg;
-
- if (list_empty(&rbp->rbp_msgs))
- return;
- msg = list_entry(rbp->rbp_msgs.next,
- struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- (void)lnet_post_routed_recv_locked(msg, 1);
-}
-
-void
-lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
-{
- struct list_head drop;
- struct lnet_msg *msg;
- struct lnet_msg *tmp;
-
- INIT_LIST_HEAD(&drop);
-
- list_splice_init(list, &drop);
-
- lnet_net_unlock(cpt);
-
- list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
- lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
- 0, 0, 0, msg->msg_hdr.payload_length);
- list_del_init(&msg->msg_list);
- lnet_finalize(NULL, msg, -ECANCELED);
- }
-
- lnet_net_lock(cpt);
-}
-
-void
-lnet_return_rx_credits_locked(struct lnet_msg *msg)
-{
- struct lnet_peer *rxpeer = msg->msg_rxpeer;
- struct lnet_msg *msg2;
-
- if (msg->msg_rtrcredit) {
- /* give back global router credits */
- struct lnet_rtrbuf *rb;
- struct lnet_rtrbufpool *rbp;
-
- /*
- * NB If a msg ever blocks for a buffer in rbp_msgs, it stays
- * there until it gets one allocated, or aborts the wait
- * itself
- */
- LASSERT(msg->msg_kiov);
-
- rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
- rbp = rb->rb_pool;
-
- msg->msg_kiov = NULL;
- msg->msg_rtrcredit = 0;
-
- LASSERT(rbp == lnet_msg2bufpool(msg));
-
- LASSERT((rbp->rbp_credits > 0) ==
- !list_empty(&rbp->rbp_bufs));
-
- /*
- * If routing is now turned off, we just drop this buffer and
- * don't bother trying to return credits.
- */
- if (!the_lnet.ln_routing) {
- lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
- goto routing_off;
- }
-
- /*
- * It is possible that a user has lowered the desired number of
- * buffers in this pool. Make sure we never put back
- * more buffers than the stated number.
- */
- if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) {
- /* Discard this buffer so we don't have too many. */
- lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
- rbp->rbp_nbuffers--;
- } else {
- list_add(&rb->rb_list, &rbp->rbp_bufs);
- rbp->rbp_credits++;
- if (rbp->rbp_credits <= 0)
- lnet_schedule_blocked_locked(rbp);
- }
- }
-
-routing_off:
- if (msg->msg_peerrtrcredit) {
- /* give back peer router credits */
- msg->msg_peerrtrcredit = 0;
-
- LASSERT((rxpeer->lp_rtrcredits < 0) ==
- !list_empty(&rxpeer->lp_rtrq));
-
- rxpeer->lp_rtrcredits++;
- /*
- * drop all messages which are queued to be routed on that
- * peer.
- */
- if (!the_lnet.ln_routing) {
- lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq,
- msg->msg_rx_cpt);
- } else if (rxpeer->lp_rtrcredits <= 0) {
- msg2 = list_entry(rxpeer->lp_rtrq.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- (void)lnet_post_routed_recv_locked(msg2, 1);
- }
- }
- if (rxpeer) {
- msg->msg_rxpeer = NULL;
- lnet_peer_decref_locked(rxpeer);
- }
-}
-
-static int
-lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
-{
- struct lnet_peer *p1 = r1->lr_gateway;
- struct lnet_peer *p2 = r2->lr_gateway;
- int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops;
- int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops;
-
- if (r1->lr_priority < r2->lr_priority)
- return 1;
-
- if (r1->lr_priority > r2->lr_priority)
- return -ERANGE;
-
- if (r1_hops < r2_hops)
- return 1;
-
- if (r1_hops > r2_hops)
- return -ERANGE;
-
- if (p1->lp_txqnob < p2->lp_txqnob)
- return 1;
-
- if (p1->lp_txqnob > p2->lp_txqnob)
- return -ERANGE;
-
- if (p1->lp_txcredits > p2->lp_txcredits)
- return 1;
-
- if (p1->lp_txcredits < p2->lp_txcredits)
- return -ERANGE;
-
- if (r1->lr_seq - r2->lr_seq <= 0)
- return 1;
-
- return -ERANGE;
-}
-
-static struct lnet_peer *
-lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
- lnet_nid_t rtr_nid)
-{
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct lnet_route *best_route;
- struct lnet_route *last_route;
- struct lnet_peer *lp_best;
- struct lnet_peer *lp;
- int rc;
-
- /*
- * If @rtr_nid is not LNET_NID_ANY, return the gateway with
- * rtr_nid nid, otherwise find the best gateway I can use
- */
- rnet = lnet_find_net_locked(LNET_NIDNET(target));
- if (!rnet)
- return NULL;
-
- lp_best = NULL;
- best_route = NULL;
- last_route = NULL;
- list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
- lp = route->lr_gateway;
-
- if (!lnet_is_route_alive(route))
- continue;
-
- if (ni && lp->lp_ni != ni)
- continue;
-
- if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
- return lp;
-
- if (!lp_best) {
- best_route = route;
- last_route = route;
- lp_best = lp;
- continue;
- }
-
- /* no protection on below fields, but it's harmless */
- if (last_route->lr_seq - route->lr_seq < 0)
- last_route = route;
-
- rc = lnet_compare_routes(route, best_route);
- if (rc < 0)
- continue;
-
- best_route = route;
- lp_best = lp;
- }
-
- /*
- * set sequence number on the best router to the latest sequence + 1
- * so we can round-robin all routers, it's race and inaccurate but
- * harmless and functional
- */
- if (best_route)
- best_route->lr_seq = last_route->lr_seq + 1;
- return lp_best;
-}
-
-int
-lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
-{
- lnet_nid_t dst_nid = msg->msg_target.nid;
- struct lnet_ni *src_ni;
- struct lnet_ni *local_ni;
- struct lnet_peer *lp;
- int cpt;
- int cpt2;
- int rc;
-
- /*
- * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
- * but we might want to use pre-determined router for ACK/REPLY
- * in the future
- */
- /* NB: ni == interface pre-determined (ACK/REPLY) */
- LASSERT(!msg->msg_txpeer);
- LASSERT(!msg->msg_sending);
- LASSERT(!msg->msg_target_is_router);
- LASSERT(!msg->msg_receiving);
-
- msg->msg_sending = 1;
-
- LASSERT(!msg->msg_tx_committed);
- cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
- again:
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- lnet_net_unlock(cpt);
- return -ESHUTDOWN;
- }
-
- if (src_nid == LNET_NID_ANY) {
- src_ni = NULL;
- } else {
- src_ni = lnet_nid2ni_locked(src_nid, cpt);
- if (!src_ni) {
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n",
- libcfs_nid2str(dst_nid),
- libcfs_nid2str(src_nid));
- return -EINVAL;
- }
- LASSERT(!msg->msg_routing);
- }
-
- /* Is this for someone on a local network? */
- local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
-
- if (local_ni) {
- if (!src_ni) {
- src_ni = local_ni;
- src_nid = src_ni->ni_nid;
- } else if (src_ni == local_ni) {
- lnet_ni_decref_locked(local_ni, cpt);
- } else {
- lnet_ni_decref_locked(local_ni, cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("No route to %s via from %s\n",
- libcfs_nid2str(dst_nid),
- libcfs_nid2str(src_nid));
- return -EINVAL;
- }
-
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
-
- if (!msg->msg_routing)
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
-
- if (src_ni == the_lnet.ln_loni) {
- /* No send credit hassles with LOLND */
- lnet_net_unlock(cpt);
- lnet_ni_send(src_ni, msg);
-
- lnet_net_lock(cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- return 0;
- }
-
- rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
- /* lp has ref on src_ni; lose mine */
- lnet_ni_decref_locked(src_ni, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("Error %d finding peer %s\n", rc,
- libcfs_nid2str(dst_nid));
- /* ENOMEM or shutting down */
- return rc;
- }
- LASSERT(lp->lp_ni == src_ni);
- } else {
- /* sending to a remote network */
- lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
- if (!lp) {
- if (src_ni)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
-
- LCONSOLE_WARN("No route to %s via %s (all routers down)\n",
- libcfs_id2str(msg->msg_target),
- libcfs_nid2str(src_nid));
- return -EHOSTUNREACH;
- }
-
- /*
- * rtr_nid is LNET_NID_ANY or NID of pre-determined router,
- * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
- * pre-determined router, this can happen if router table
- * was changed when we release the lock
- */
- if (rtr_nid != lp->lp_nid) {
- cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
- if (cpt2 != cpt) {
- if (src_ni)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
-
- rtr_nid = lp->lp_nid;
- cpt = cpt2;
- goto again;
- }
- }
-
- CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
- libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
- lnet_msgtyp2str(msg->msg_type), msg->msg_len);
-
- if (!src_ni) {
- src_ni = lp->lp_ni;
- src_nid = src_ni->ni_nid;
- } else {
- LASSERT(src_ni == lp->lp_ni);
- lnet_ni_decref_locked(src_ni, cpt);
- }
-
- lnet_peer_addref_locked(lp);
-
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
-
- if (!msg->msg_routing) {
- /* I'm the source and now I know which NI to send on */
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
- }
-
- msg->msg_target_is_router = 1;
- msg->msg_target.nid = lp->lp_nid;
- msg->msg_target.pid = LNET_PID_LUSTRE;
- }
-
- /* 'lp' is our best choice of peer */
-
- LASSERT(!msg->msg_peertxcredit);
- LASSERT(!msg->msg_txcredit);
- LASSERT(!msg->msg_txpeer);
-
- msg->msg_txpeer = lp; /* msg takes my ref on lp */
-
- rc = lnet_post_send_locked(msg, 0);
- lnet_net_unlock(cpt);
-
- if (rc < 0)
- return rc;
-
- if (rc == LNET_CREDIT_OK)
- lnet_ni_send(src_ni, msg);
-
- return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
-}
-
-void
-lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob)
-{
- lnet_net_lock(cpt);
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += nob;
- lnet_net_unlock(cpt);
-
- lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
-}
-
-static void
-lnet_recv_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
-
- if (msg->msg_wanted)
- lnet_setpayloadbuffer(msg);
-
- lnet_build_msg_event(msg, LNET_EVENT_PUT);
-
- /*
- * Must I ACK? If so I'll grab the ack_wmd out of the header and put
- * it back into the ACK during lnet_finalize()
- */
- msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
- !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE);
-
- lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
- msg->msg_offset, msg->msg_wanted, hdr->payload_length);
-}
-
-static int
-lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_match_info info;
- bool ready_delay;
- int rc;
-
- /* Convert put fields to host byte order */
- le64_to_cpus(&hdr->msg.put.match_bits);
- le32_to_cpus(&hdr->msg.put.ptl_index);
- le32_to_cpus(&hdr->msg.put.offset);
-
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_PUT;
- info.mi_portal = hdr->msg.put.ptl_index;
- info.mi_rlength = hdr->payload_length;
- info.mi_roffset = hdr->msg.put.offset;
- info.mi_mbits = hdr->msg.put.match_bits;
-
- msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv;
- ready_delay = msg->msg_rx_ready_delay;
-
- again:
- rc = lnet_ptl_match_md(&info, msg);
- switch (rc) {
- default:
- LBUG();
-
- case LNET_MATCHMD_OK:
- lnet_recv_put(ni, msg);
- return 0;
-
- case LNET_MATCHMD_NONE:
- /**
- * no eager_recv or has already called it, should
- * have been attached on delayed list
- */
- if (ready_delay)
- return 0;
-
- rc = lnet_ni_eager_recv(ni, msg);
- if (!rc) {
- ready_delay = true;
- goto again;
- }
- /* fall through */
-
- case LNET_MATCHMD_DROP:
- CNETERR("Dropping PUT from %s portal %d match %llu offset %d length %d: %d\n",
- libcfs_id2str(info.mi_id), info.mi_portal,
- info.mi_mbits, info.mi_roffset, info.mi_rlength, rc);
-
- return -ENOENT; /* -ve: OK but no match */
- }
-}
-
-static int
-lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get)
-{
- struct lnet_match_info info;
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_handle_wire reply_wmd;
- int rc;
-
- /* Convert get fields to host byte order */
- le64_to_cpus(&hdr->msg.get.match_bits);
- le32_to_cpus(&hdr->msg.get.ptl_index);
- le32_to_cpus(&hdr->msg.get.sink_length);
- le32_to_cpus(&hdr->msg.get.src_offset);
-
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_GET;
- info.mi_portal = hdr->msg.get.ptl_index;
- info.mi_rlength = hdr->msg.get.sink_length;
- info.mi_roffset = hdr->msg.get.src_offset;
- info.mi_mbits = hdr->msg.get.match_bits;
-
- rc = lnet_ptl_match_md(&info, msg);
- if (rc == LNET_MATCHMD_DROP) {
- CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n",
- libcfs_id2str(info.mi_id), info.mi_portal,
- info.mi_mbits, info.mi_roffset, info.mi_rlength);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- LASSERT(rc == LNET_MATCHMD_OK);
-
- lnet_build_msg_event(msg, LNET_EVENT_GET);
-
- reply_wmd = hdr->msg.get.return_wmd;
-
- lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
- msg->msg_offset, msg->msg_wanted);
-
- msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
-
- if (rdma_get) {
- /* The LND completes the REPLY from her recv procedure */
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- msg->msg_offset, msg->msg_len, msg->msg_len);
- return 0;
- }
-
- lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
- msg->msg_receiving = 0;
-
- rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
- if (rc < 0) {
- /* didn't get as far as lnet_ni_send() */
- CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
- libcfs_nid2str(ni->ni_nid),
- libcfs_id2str(info.mi_id), rc);
-
- lnet_finalize(ni, msg, rc);
- }
-
- return 0;
-}
-
-static int
-lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- void *private = msg->msg_private;
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_process_id src = {0};
- struct lnet_libmd *md;
- int rlength;
- int mlength;
- int cpt;
-
- cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie);
- lnet_res_lock(cpt);
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
- if (!md || !md->md_threshold || md->md_me) {
- CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- !md ? "invalid" : "inactive",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie);
- if (md && md->md_me)
- CERROR("REPLY MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- LASSERT(!md->md_offset);
-
- rlength = hdr->payload_length;
- mlength = min_t(uint, rlength, md->md_length);
-
- if (mlength < rlength &&
- !(md->md_options & LNET_MD_TRUNCATE)) {
- CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
- mlength);
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
-
- lnet_msg_attach_md(msg, md, 0, mlength);
-
- if (mlength)
- lnet_setpayloadbuffer(msg);
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
- lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
- return 0;
-}
-
-static int
-lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_process_id src = {0};
- struct lnet_libmd *md;
- int cpt;
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- /* Convert ack fields to host byte order */
- le64_to_cpus(&hdr->msg.ack.match_bits);
- le32_to_cpus(&hdr->msg.ack.mlength);
-
- cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie);
- lnet_res_lock(cpt);
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
- if (!md || !md->md_threshold || md->md_me) {
- /* Don't moan; this is expected */
- CDEBUG(D_NET,
- "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- !md ? "invalid" : "inactive",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie);
- if (md && md->md_me)
- CERROR("Source MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve! */
- }
-
- CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- hdr->msg.ack.dst_wmd.wh_object_cookie);
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_ACK);
-
- lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
- return 0;
-}
-
-/**
- * \retval LNET_CREDIT_OK If \a msg is forwarded
- * \retval LNET_CREDIT_WAIT If \a msg is blocked because w/o buffer
- * \retval -ve error code
- */
-int
-lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc = 0;
-
- if (!the_lnet.ln_routing)
- return -ECANCELED;
-
- if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
- lnet_msg2bufpool(msg)->rbp_credits <= 0) {
- if (!ni->ni_lnd->lnd_eager_recv) {
- msg->msg_rx_ready_delay = 1;
- } else {
- lnet_net_unlock(msg->msg_rx_cpt);
- rc = lnet_ni_eager_recv(ni, msg);
- lnet_net_lock(msg->msg_rx_cpt);
- }
- }
-
- if (!rc)
- rc = lnet_post_routed_recv_locked(msg, 0);
- return rc;
-}
-
-int
-lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc;
-
- switch (msg->msg_type) {
- case LNET_MSG_ACK:
- rc = lnet_parse_ack(ni, msg);
- break;
- case LNET_MSG_PUT:
- rc = lnet_parse_put(ni, msg);
- break;
- case LNET_MSG_GET:
- rc = lnet_parse_get(ni, msg, msg->msg_rdma_get);
- break;
- case LNET_MSG_REPLY:
- rc = lnet_parse_reply(ni, msg);
- break;
- default: /* prevent an unused label if !kernel */
- LASSERT(0);
- return -EPROTO;
- }
-
- LASSERT(!rc || rc == -ENOENT);
- return rc;
-}
-
-char *
-lnet_msgtyp2str(int type)
-{
- switch (type) {
- case LNET_MSG_ACK:
- return "ACK";
- case LNET_MSG_PUT:
- return "PUT";
- case LNET_MSG_GET:
- return "GET";
- case LNET_MSG_REPLY:
- return "REPLY";
- case LNET_MSG_HELLO:
- return "HELLO";
- default:
- return "<UNKNOWN>";
- }
-}
-
-void
-lnet_print_hdr(struct lnet_hdr *hdr)
-{
- struct lnet_process_id src = {0};
- struct lnet_process_id dst = {0};
- char *type_str = lnet_msgtyp2str(hdr->type);
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- dst.nid = hdr->dest_nid;
- dst.pid = hdr->dest_pid;
-
- CWARN("P3 Header at %p of type %s\n", hdr, type_str);
- CWARN(" From %s\n", libcfs_id2str(src));
- CWARN(" To %s\n", libcfs_id2str(dst));
-
- switch (hdr->type) {
- default:
- break;
-
- case LNET_MSG_PUT:
- CWARN(" Ptl index %d, ack md %#llx.%#llx, match bits %llu\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- CWARN(" Length %d, offset %d, hdr data %#llx\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
- break;
-
- case LNET_MSG_GET:
- CWARN(" Ptl index %d, return md %#llx.%#llx, match bits %llu\n",
- hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- CWARN(" Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
- break;
-
- case LNET_MSG_ACK:
- CWARN(" dst md %#llx.%#llx, manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
- break;
-
- case LNET_MSG_REPLY:
- CWARN(" dst md %#llx.%#llx, length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
- }
-}
-
-int
-lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,
- void *private, int rdma_req)
-{
- int rc = 0;
- int cpt;
- int for_me;
- struct lnet_msg *msg;
- lnet_pid_t dest_pid;
- lnet_nid_t dest_nid;
- lnet_nid_t src_nid;
- __u32 payload_length;
- __u32 type;
-
- LASSERT(!in_interrupt());
-
- type = le32_to_cpu(hdr->type);
- src_nid = le64_to_cpu(hdr->src_nid);
- dest_nid = le64_to_cpu(hdr->dest_nid);
- dest_pid = le32_to_cpu(hdr->dest_pid);
- payload_length = le32_to_cpu(hdr->payload_length);
-
- for_me = (ni->ni_nid == dest_nid);
- cpt = lnet_cpt_of_nid(from_nid);
-
- switch (type) {
- case LNET_MSG_ACK:
- case LNET_MSG_GET:
- if (payload_length > 0) {
- CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), payload_length);
- return -EPROTO;
- }
- break;
-
- case LNET_MSG_PUT:
- case LNET_MSG_REPLY:
- if (payload_length >
- (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
- CERROR("%s, src %s: bad %s payload %d (%d max expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type),
- payload_length,
- for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
- return -EPROTO;
- }
- break;
-
- default:
- CERROR("%s, src %s: Bad message type 0x%x\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid), type);
- return -EPROTO;
- }
-
- if (the_lnet.ln_routing &&
- ni->ni_last_alive != ktime_get_real_seconds()) {
- /* NB: so far here is the only place to set NI status to "up */
- lnet_ni_lock(ni);
- ni->ni_last_alive = ktime_get_real_seconds();
- if (ni->ni_status &&
- ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
- ni->ni_status->ns_status = LNET_NI_STATUS_UP;
- lnet_ni_unlock(ni);
- }
-
- /*
- * Regard a bad destination NID as a protocol error. Senders should
- * know what they're doing; if they don't they're misconfigured, buggy
- * or malicious so we chop them off at the knees :)
- */
- if (!for_me) {
- if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
- /* should have gone direct */
- CERROR("%s, src %s: Bad dest nid %s (should have been sent direct)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (lnet_islocalnid(dest_nid)) {
- /*
- * dest is another local NI; sender should have used
- * this node's NID on its own network
- */
- CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (rdma_req && type == LNET_MSG_GET) {
- CERROR("%s, src %s: Bad optimized GET for %s (final destination must be me)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (!the_lnet.ln_routing) {
- CERROR("%s, src %s: Dropping message for %s (routing not enabled)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- goto drop;
- }
- }
-
- /*
- * Message looks OK; we're not going to return an error, so we MUST
- * call back lnd_recv() come what may...
- */
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(src_nid, 0)) { /* shall we now? */
- CERROR("%s, src %s: Dropping %s to simulate failure\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- if (!list_empty(&the_lnet.ln_drop_rules) &&
- lnet_drop_rule_match(hdr)) {
- CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid), lnet_msgtyp2str(type));
- goto drop;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("%s, src %s: Dropping %s (out of memory)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- /* msg zeroed by kzalloc()
- * i.e. flags all clear, pointers NULL etc
- */
- msg->msg_type = type;
- msg->msg_private = private;
- msg->msg_receiving = 1;
- msg->msg_rdma_get = rdma_req;
- msg->msg_wanted = payload_length;
- msg->msg_len = payload_length;
- msg->msg_offset = 0;
- msg->msg_hdr = *hdr;
- /* for building message event */
- msg->msg_from = from_nid;
- if (!for_me) {
- msg->msg_target.pid = dest_pid;
- msg->msg_target.nid = dest_nid;
- msg->msg_routing = 1;
-
- } else {
- /* convert common msg->hdr fields to host byteorder */
- msg->msg_hdr.type = type;
- msg->msg_hdr.src_nid = src_nid;
- le32_to_cpus(&msg->msg_hdr.src_pid);
- msg->msg_hdr.dest_nid = dest_nid;
- msg->msg_hdr.dest_pid = dest_pid;
- msg->msg_hdr.payload_length = payload_length;
- }
-
- lnet_net_lock(cpt);
- rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), rc);
- kfree(msg);
- if (rc == -ESHUTDOWN)
- /* We are shutting down. Don't do anything more */
- return 0;
- goto drop;
- }
-
- if (lnet_isrouter(msg->msg_rxpeer)) {
- lnet_peer_set_alive(msg->msg_rxpeer);
- if (avoid_asym_router_failure &&
- LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
- /* received a remote message from router, update
- * remote NI status on this router.
- * NB: multi-hop routed message will be ignored.
- */
- lnet_router_ni_update_locked(msg->msg_rxpeer,
- LNET_NIDNET(src_nid));
- }
- }
-
- lnet_msg_commit(msg, cpt);
-
- /* message delay simulation */
- if (unlikely(!list_empty(&the_lnet.ln_delay_rules) &&
- lnet_delay_rule_match_locked(hdr, msg))) {
- lnet_net_unlock(cpt);
- return 0;
- }
-
- if (!for_me) {
- rc = lnet_parse_forward_locked(ni, msg);
- lnet_net_unlock(cpt);
-
- if (rc < 0)
- goto free_drop;
-
- if (rc == LNET_CREDIT_OK) {
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- 0, payload_length, payload_length);
- }
- return 0;
- }
-
- lnet_net_unlock(cpt);
-
- rc = lnet_parse_local(ni, msg);
- if (rc)
- goto free_drop;
- return 0;
-
- free_drop:
- LASSERT(!msg->msg_md);
- lnet_finalize(ni, msg, rc);
-
- drop:
- lnet_drop_message(ni, cpt, private, payload_length);
- return 0;
-}
-EXPORT_SYMBOL(lnet_parse);
-
-void
-lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
-{
- while (!list_empty(head)) {
- struct lnet_process_id id = {0};
- struct lnet_msg *msg;
-
- msg = list_entry(head->next, struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- id.nid = msg->msg_hdr.src_nid;
- id.pid = msg->msg_hdr.src_pid;
-
- LASSERT(!msg->msg_md);
- LASSERT(msg->msg_rx_delayed);
- LASSERT(msg->msg_rxpeer);
- LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
- CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n",
- libcfs_id2str(id),
- msg->msg_hdr.msg.put.ptl_index,
- msg->msg_hdr.msg.put.match_bits,
- msg->msg_hdr.msg.put.offset,
- msg->msg_hdr.payload_length, reason);
-
- /*
- * NB I can't drop msg's ref on msg_rxpeer until after I've
- * called lnet_drop_message(), so I just hang onto msg as well
- * until that's done
- */
- lnet_drop_message(msg->msg_rxpeer->lp_ni,
- msg->msg_rxpeer->lp_cpt,
- msg->msg_private, msg->msg_len);
- /*
- * NB: message will not generate event because w/o attached MD,
- * but we still should give error code so lnet_msg_decommit()
- * can skip counters operations and other checks.
- */
- lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
- }
-}
-
-void
-lnet_recv_delayed_msg_list(struct list_head *head)
-{
- while (!list_empty(head)) {
- struct lnet_msg *msg;
- struct lnet_process_id id;
-
- msg = list_entry(head->next, struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- /*
- * md won't disappear under me, since each msg
- * holds a ref on it
- */
- id.nid = msg->msg_hdr.src_nid;
- id.pid = msg->msg_hdr.src_pid;
-
- LASSERT(msg->msg_rx_delayed);
- LASSERT(msg->msg_md);
- LASSERT(msg->msg_rxpeer);
- LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
- CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
- libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index,
- msg->msg_hdr.msg.put.match_bits,
- msg->msg_hdr.msg.put.offset,
- msg->msg_hdr.payload_length);
-
- lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
- }
-}
-
-/**
- * Initiate an asynchronous PUT operation.
- *
- * There are several events associated with a PUT: completion of the send on
- * the initiator node (LNET_EVENT_SEND), and when the send completes
- * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating
- * that the operation was accepted by the target. The event LNET_EVENT_PUT is
- * used at the target node to indicate the completion of incoming data
- * delivery.
- *
- * The local events will be logged in the EQ associated with the MD pointed to
- * by \a mdh handle. Using a MD without an associated EQ results in these
- * events being discarded. In this case, the caller must have another
- * mechanism (e.g., a higher level protocol) for determining when it is safe
- * to modify the memory region associated with the MD.
- *
- * Note that LNet does not guarantee the order of LNET_EVENT_SEND and
- * LNET_EVENT_ACK, though intuitively ACK should happen after SEND.
- *
- * \param self Indicates the NID of a local interface through which to send
- * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself.
- * \param mdh A handle for the MD that describes the memory to be sent. The MD
- * must be "free floating" (See LNetMDBind()).
- * \param ack Controls whether an acknowledgment is requested.
- * Acknowledgments are only sent when they are requested by the initiating
- * process and the target MD enables them.
- * \param target A process identifier for the target process.
- * \param portal The index in the \a target's portal table.
- * \param match_bits The match bits to use for MD selection at the target
- * process.
- * \param offset The offset into the target MD (only used when the target
- * MD has the LNET_MD_MANAGE_REMOTE option set).
- * \param hdr_data 64 bits of user data that can be included in the message
- * header. This data is written to an event queue entry at the target if an
- * EQ is present on the matching MD.
- *
- * \retval 0 Success, and only in this case events will be generated
- * and logged to EQ (if it exists).
- * \retval -EIO Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- *
- * \see lnet_event::hdr_data and lnet_event_kind.
- */
-int
-LNetPut(lnet_nid_t self, struct lnet_handle_md mdh, enum lnet_ack_req ack,
- struct lnet_process_id target, unsigned int portal,
- __u64 match_bits, unsigned int offset,
- __u64 hdr_data)
-{
- struct lnet_msg *msg;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(target.nid, 1)) { /* shall we now? */
- CERROR("Dropping PUT to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("Dropping PUT to %s: ENOMEM on struct lnet_msg\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
- msg->msg_vmflush = !!memory_pressure_get();
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md || !md->md_threshold || md->md_me) {
- CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n",
- match_bits, portal, libcfs_id2str(target),
- !md ? -1 : md->md_threshold);
- if (md && md->md_me)
- CERROR("Source MD also attached to portal %d\n",
- md->md_me->me_portal);
- lnet_res_unlock(cpt);
-
- kfree(msg);
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
-
- msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.put.hdr_data = hdr_data;
-
- /* NB handles only looked up by creator (no flips) */
- if (ack == LNET_ACK_REQ) {
- msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
- } else {
- msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
- LNET_WIRE_HANDLE_COOKIE_NONE;
- msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
- LNET_WIRE_HANDLE_COOKIE_NONE;
- }
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
- rc = lnet_send(self, msg, LNET_NID_ANY);
- if (rc) {
- CNETERR("Error sending PUT to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize(NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-EXPORT_SYMBOL(LNetPut);
-
-struct lnet_msg *
-lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg)
-{
- /*
- * The LND can DMA direct to the GET md (i.e. no REPLY msg). This
- * returns a msg for the LND to pass to lnet_finalize() when the sink
- * data has been received.
- *
- * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
- * lnet_finalize() is called on it, so the LND must call this first
- */
- struct lnet_msg *msg = kzalloc(sizeof(*msg), GFP_NOFS);
- struct lnet_libmd *getmd = getmsg->msg_md;
- struct lnet_process_id peer_id = getmsg->msg_target;
- int cpt;
-
- LASSERT(!getmsg->msg_target_is_router);
- LASSERT(!getmsg->msg_routing);
-
- if (!msg) {
- CERROR("%s: Dropping REPLY from %s: can't allocate msg\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
- goto drop;
- }
-
- cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie);
- lnet_res_lock(cpt);
-
- LASSERT(getmd->md_refcount > 0);
-
- if (!getmd->md_threshold) {
- CERROR("%s: Dropping REPLY from %s for inactive MD %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
- getmd);
- lnet_res_unlock(cpt);
- goto drop;
- }
-
- LASSERT(!getmd->md_offset);
-
- CDEBUG(D_NET, "%s: Reply from %s md %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
-
- /* setup information for lnet_build_msg_event */
- msg->msg_from = peer_id.nid;
- msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
- msg->msg_hdr.src_nid = peer_id.nid;
- msg->msg_hdr.payload_length = getmd->md_length;
- msg->msg_receiving = 1; /* required by lnet_msg_attach_md */
-
- lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
- lnet_res_unlock(cpt);
-
- cpt = lnet_cpt_of_nid(peer_id.nid);
-
- lnet_net_lock(cpt);
- lnet_msg_commit(msg, cpt);
- lnet_net_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
- return msg;
-
- drop:
- cpt = lnet_cpt_of_nid(peer_id.nid);
-
- lnet_net_lock(cpt);
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
- lnet_net_unlock(cpt);
-
- kfree(msg);
-
- return NULL;
-}
-EXPORT_SYMBOL(lnet_create_reply_msg);
-
-void
-lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *reply,
- unsigned int len)
-{
- /*
- * Set the REPLY length, now the RDMA that elides the REPLY message has
- * completed and I know it.
- */
- LASSERT(reply);
- LASSERT(reply->msg_type == LNET_MSG_GET);
- LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY);
-
- /*
- * NB I trusted my peer to RDMA. If she tells me she's written beyond
- * the end of my buffer, I might as well be dead.
- */
- LASSERT(len <= reply->msg_ev.mlength);
-
- reply->msg_ev.mlength = len;
-}
-EXPORT_SYMBOL(lnet_set_reply_msg_len);
-
-/**
- * Initiate an asynchronous GET operation.
- *
- * On the initiator node, an LNET_EVENT_SEND is logged when the GET request
- * is sent, and an LNET_EVENT_REPLY is logged when the data returned from
- * the target node in the REPLY has been written to local MD.
- *
- * On the target node, an LNET_EVENT_GET is logged when the GET request
- * arrives and is accepted into a MD.
- *
- * \param self,target,portal,match_bits,offset See the discussion in LNetPut().
- * \param mdh A handle for the MD that describes the memory into which the
- * requested data will be received. The MD must be "free floating"
- * (See LNetMDBind()).
- *
- * \retval 0 Success, and only in this case events will be generated
- * and logged to EQ (if it exists) of the MD.
- * \retval -EIO Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- */
-int
-LNetGet(lnet_nid_t self, struct lnet_handle_md mdh,
- struct lnet_process_id target, unsigned int portal,
- __u64 match_bits, unsigned int offset)
-{
- struct lnet_msg *msg;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(target.nid, 1)) { /* shall we now? */
- CERROR("Dropping GET to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("Dropping GET to %s: ENOMEM on struct lnet_msg\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md || !md->md_threshold || md->md_me) {
- CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n",
- match_bits, portal, libcfs_id2str(target),
- !md ? -1 : md->md_threshold);
- if (md && md->md_me)
- CERROR("REPLY MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
-
- kfree(msg);
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
-
- msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
-
- /* NB handles only looked up by creator (no flips) */
- msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
- rc = lnet_send(self, msg, LNET_NID_ANY);
- if (rc < 0) {
- CNETERR("Error sending GET to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize(NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-EXPORT_SYMBOL(LNetGet);
-
-/**
- * Calculate distance to node at \a dstnid.
- *
- * \param dstnid Target NID.
- * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid
- * is saved here.
- * \param orderp If not NULL, order of the route to reach \a dstnid is saved
- * here.
- *
- * \retval 0 If \a dstnid belongs to a local interface, and reserved option
- * local_nid_dist_zero is set, which is the default.
- * \retval positives Distance to target NID, i.e. number of hops plus one.
- * \retval -EHOSTUNREACH If \a dstnid is not reachable.
- */
-int
-LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
-{
- struct list_head *e;
- struct lnet_ni *ni;
- struct lnet_remotenet *rnet;
- __u32 dstnet = LNET_NIDNET(dstnid);
- int hops;
- int cpt;
- __u32 order = 2;
- struct list_head *rn_list;
-
- /*
- * if !local_nid_dist_zero, I don't return a distance of 0 ever
- * (when lustre sees a distance of 0, it substitutes 0@lo), so I
- * keep order 0 free for 0@lo and order 1 free for a local NID
- * match
- */
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_net_lock_current();
-
- list_for_each(e, &the_lnet.ln_nis) {
- ni = list_entry(e, struct lnet_ni, ni_list);
-
- if (ni->ni_nid == dstnid) {
- if (srcnidp)
- *srcnidp = dstnid;
- if (orderp) {
- if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
- *orderp = 0;
- else
- *orderp = 1;
- }
- lnet_net_unlock(cpt);
-
- return local_nid_dist_zero ? 0 : 1;
- }
-
- if (LNET_NIDNET(ni->ni_nid) == dstnet) {
- /*
- * Check if ni was originally created in
- * current net namespace.
- * If not, assign order above 0xffff0000,
- * to make this ni not a priority.
- */
- if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
- order += 0xffff0000;
-
- if (srcnidp)
- *srcnidp = ni->ni_nid;
- if (orderp)
- *orderp = order;
- lnet_net_unlock(cpt);
- return 1;
- }
-
- order++;
- }
-
- rn_list = lnet_net2rnethash(dstnet);
- list_for_each(e, rn_list) {
- rnet = list_entry(e, struct lnet_remotenet, lrn_list);
-
- if (rnet->lrn_net == dstnet) {
- struct lnet_route *route;
- struct lnet_route *shortest = NULL;
- __u32 shortest_hops = LNET_UNDEFINED_HOPS;
- __u32 route_hops;
-
- LASSERT(!list_empty(&rnet->lrn_routes));
-
- list_for_each_entry(route, &rnet->lrn_routes,
- lr_list) {
- route_hops = route->lr_hops;
- if (route_hops == LNET_UNDEFINED_HOPS)
- route_hops = 1;
- if (!shortest ||
- route_hops < shortest_hops) {
- shortest = route;
- shortest_hops = route_hops;
- }
- }
-
- LASSERT(shortest);
- hops = shortest_hops;
- if (srcnidp)
- *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
- if (orderp)
- *orderp = order;
- lnet_net_unlock(cpt);
- return hops + 1;
- }
- order++;
- }
-
- lnet_net_unlock(cpt);
- return -EHOSTUNREACH;
-}
-EXPORT_SYMBOL(LNetDist);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
deleted file mode 100644
index 0091273c04b9..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ /dev/null
@@ -1,625 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-msg.c
- *
- * Message decoding, parsing and finalizing routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-void
-lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev)
-{
- memset(ev, 0, sizeof(*ev));
-
- ev->status = 0;
- ev->unlinked = 1;
- ev->type = LNET_EVENT_UNLINK;
- lnet_md_deconstruct(md, &ev->md);
- lnet_md2handle(&ev->md_handle, md);
-}
-
-/*
- * Don't need any lock, must be called after lnet_commit_md
- */
-void
-lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(!msg->msg_routing);
-
- ev->type = ev_type;
-
- if (ev_type == LNET_EVENT_SEND) {
- /* event for active message */
- ev->target.nid = le64_to_cpu(hdr->dest_nid);
- ev->target.pid = le32_to_cpu(hdr->dest_pid);
- ev->initiator.nid = LNET_NID_ANY;
- ev->initiator.pid = the_lnet.ln_pid;
- ev->sender = LNET_NID_ANY;
- } else {
- /* event for passive message */
- ev->target.pid = hdr->dest_pid;
- ev->target.nid = hdr->dest_nid;
- ev->initiator.pid = hdr->src_pid;
- ev->initiator.nid = hdr->src_nid;
- ev->rlength = hdr->payload_length;
- ev->sender = msg->msg_from;
- ev->mlength = msg->msg_wanted;
- ev->offset = msg->msg_offset;
- }
-
- switch (ev_type) {
- default:
- LBUG();
-
- case LNET_EVENT_PUT: /* passive PUT */
- ev->pt_index = hdr->msg.put.ptl_index;
- ev->match_bits = hdr->msg.put.match_bits;
- ev->hdr_data = hdr->msg.put.hdr_data;
- return;
-
- case LNET_EVENT_GET: /* passive GET */
- ev->pt_index = hdr->msg.get.ptl_index;
- ev->match_bits = hdr->msg.get.match_bits;
- ev->hdr_data = 0;
- return;
-
- case LNET_EVENT_ACK: /* ACK */
- ev->match_bits = hdr->msg.ack.match_bits;
- ev->mlength = hdr->msg.ack.mlength;
- return;
-
- case LNET_EVENT_REPLY: /* REPLY */
- return;
-
- case LNET_EVENT_SEND: /* active message */
- if (msg->msg_type == LNET_MSG_PUT) {
- ev->pt_index = le32_to_cpu(hdr->msg.put.ptl_index);
- ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
- ev->offset = le32_to_cpu(hdr->msg.put.offset);
- ev->mlength =
- ev->rlength = le32_to_cpu(hdr->payload_length);
- ev->hdr_data = le64_to_cpu(hdr->msg.put.hdr_data);
-
- } else {
- LASSERT(msg->msg_type == LNET_MSG_GET);
- ev->pt_index = le32_to_cpu(hdr->msg.get.ptl_index);
- ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
- ev->mlength =
- ev->rlength = le32_to_cpu(hdr->msg.get.sink_length);
- ev->offset = le32_to_cpu(hdr->msg.get.src_offset);
- ev->hdr_data = 0;
- }
- return;
- }
-}
-
-void
-lnet_msg_commit(struct lnet_msg *msg, int cpt)
-{
- struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
- struct lnet_counters *counters = the_lnet.ln_counters[cpt];
-
- /* routed message can be committed for both receiving and sending */
- LASSERT(!msg->msg_tx_committed);
-
- if (msg->msg_sending) {
- LASSERT(!msg->msg_receiving);
-
- msg->msg_tx_cpt = cpt;
- msg->msg_tx_committed = 1;
- if (msg->msg_rx_committed) { /* routed message REPLY */
- LASSERT(msg->msg_onactivelist);
- return;
- }
- } else {
- LASSERT(!msg->msg_sending);
- msg->msg_rx_cpt = cpt;
- msg->msg_rx_committed = 1;
- }
-
- LASSERT(!msg->msg_onactivelist);
- msg->msg_onactivelist = 1;
- list_add(&msg->msg_activelist, &container->msc_active);
-
- counters->msgs_alloc++;
- if (counters->msgs_alloc > counters->msgs_max)
- counters->msgs_max = counters->msgs_alloc;
-}
-
-static void
-lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
-{
- struct lnet_counters *counters;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(msg->msg_tx_committed);
- if (status)
- goto out;
-
- counters = the_lnet.ln_counters[msg->msg_tx_cpt];
- switch (ev->type) {
- default: /* routed message */
- LASSERT(msg->msg_routing);
- LASSERT(msg->msg_rx_committed);
- LASSERT(!ev->type);
-
- counters->route_length += msg->msg_len;
- counters->route_count++;
- goto out;
-
- case LNET_EVENT_PUT:
- /* should have been decommitted */
- LASSERT(!msg->msg_rx_committed);
- /* overwritten while sending ACK */
- LASSERT(msg->msg_type == LNET_MSG_ACK);
- msg->msg_type = LNET_MSG_PUT; /* fix type */
- break;
-
- case LNET_EVENT_SEND:
- LASSERT(!msg->msg_rx_committed);
- if (msg->msg_type == LNET_MSG_PUT)
- counters->send_length += msg->msg_len;
- break;
-
- case LNET_EVENT_GET:
- LASSERT(msg->msg_rx_committed);
- /*
- * overwritten while sending reply, we should never be
- * here for optimized GET
- */
- LASSERT(msg->msg_type == LNET_MSG_REPLY);
- msg->msg_type = LNET_MSG_GET; /* fix type */
- break;
- }
-
- counters->send_count++;
- out:
- lnet_return_tx_credits_locked(msg);
- msg->msg_tx_committed = 0;
-}
-
-static void
-lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
-{
- struct lnet_counters *counters;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
- LASSERT(msg->msg_rx_committed);
-
- if (status)
- goto out;
-
- counters = the_lnet.ln_counters[msg->msg_rx_cpt];
- switch (ev->type) {
- default:
- LASSERT(!ev->type);
- LASSERT(msg->msg_routing);
- goto out;
-
- case LNET_EVENT_ACK:
- LASSERT(msg->msg_type == LNET_MSG_ACK);
- break;
-
- case LNET_EVENT_GET:
- /*
- * type is "REPLY" if it's an optimized GET on passive side,
- * because optimized GET will never be committed for sending,
- * so message type wouldn't be changed back to "GET" by
- * lnet_msg_decommit_tx(), see details in lnet_parse_get()
- */
- LASSERT(msg->msg_type == LNET_MSG_REPLY ||
- msg->msg_type == LNET_MSG_GET);
- counters->send_length += msg->msg_wanted;
- break;
-
- case LNET_EVENT_PUT:
- LASSERT(msg->msg_type == LNET_MSG_PUT);
- break;
-
- case LNET_EVENT_REPLY:
- /*
- * type is "GET" if it's an optimized GET on active side,
- * see details in lnet_create_reply_msg()
- */
- LASSERT(msg->msg_type == LNET_MSG_GET ||
- msg->msg_type == LNET_MSG_REPLY);
- break;
- }
-
- counters->recv_count++;
- if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
- counters->recv_length += msg->msg_wanted;
-
- out:
- lnet_return_rx_credits_locked(msg);
- msg->msg_rx_committed = 0;
-}
-
-void
-lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status)
-{
- int cpt2 = cpt;
-
- LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
- LASSERT(msg->msg_onactivelist);
-
- if (msg->msg_tx_committed) { /* always decommit for sending first */
- LASSERT(cpt == msg->msg_tx_cpt);
- lnet_msg_decommit_tx(msg, status);
- }
-
- if (msg->msg_rx_committed) {
- /* forwarding msg committed for both receiving and sending */
- if (cpt != msg->msg_rx_cpt) {
- lnet_net_unlock(cpt);
- cpt2 = msg->msg_rx_cpt;
- lnet_net_lock(cpt2);
- }
- lnet_msg_decommit_rx(msg, status);
- }
-
- list_del(&msg->msg_activelist);
- msg->msg_onactivelist = 0;
-
- the_lnet.ln_counters[cpt2]->msgs_alloc--;
-
- if (cpt2 != cpt) {
- lnet_net_unlock(cpt2);
- lnet_net_lock(cpt);
- }
-}
-
-void
-lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
- unsigned int offset, unsigned int mlen)
-{
- /* NB: @offset and @len are only useful for receiving */
- /*
- * Here, we attach the MD on lnet_msg and mark it busy and
- * decrementing its threshold. Come what may, the lnet_msg "owns"
- * the MD until a call to lnet_msg_detach_md or lnet_finalize()
- * signals completion.
- */
- LASSERT(!msg->msg_routing);
-
- msg->msg_md = md;
- if (msg->msg_receiving) { /* committed for receiving */
- msg->msg_offset = offset;
- msg->msg_wanted = mlen;
- }
-
- md->md_refcount++;
- if (md->md_threshold != LNET_MD_THRESH_INF) {
- LASSERT(md->md_threshold > 0);
- md->md_threshold--;
- }
-
- /* build umd in event */
- lnet_md2handle(&msg->msg_ev.md_handle, md);
- lnet_md_deconstruct(md, &msg->msg_ev.md);
-}
-
-void
-lnet_msg_detach_md(struct lnet_msg *msg, int status)
-{
- struct lnet_libmd *md = msg->msg_md;
- int unlink;
-
- /* Now it's safe to drop my caller's ref */
- md->md_refcount--;
- LASSERT(md->md_refcount >= 0);
-
- unlink = lnet_md_unlinkable(md);
- if (md->md_eq) {
- msg->msg_ev.status = status;
- msg->msg_ev.unlinked = unlink;
- lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
- }
-
- if (unlink)
- lnet_md_unlink(md);
-
- msg->msg_md = NULL;
-}
-
-static int
-lnet_complete_msg_locked(struct lnet_msg *msg, int cpt)
-{
- struct lnet_handle_wire ack_wmd;
- int rc;
- int status = msg->msg_ev.status;
-
- LASSERT(msg->msg_onactivelist);
-
- if (!status && msg->msg_ack) {
- /* Only send an ACK if the PUT completed successfully */
-
- lnet_msg_decommit(msg, cpt, 0);
-
- msg->msg_ack = 0;
- lnet_net_unlock(cpt);
-
- LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
- LASSERT(!msg->msg_routing);
-
- ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
-
- lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
-
- msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
- msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
- msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
-
- /*
- * NB: we probably want to use NID of msg::msg_from as 3rd
- * parameter (router NID) if it's routed message
- */
- rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
-
- lnet_net_lock(cpt);
- /*
- * NB: message is committed for sending, we should return
- * on success because LND will finalize this message later.
- *
- * Also, there is possibility that message is committed for
- * sending and also failed before delivering to LND,
- * i.e: ENOMEM, in that case we can't fall through either
- * because CPT for sending can be different with CPT for
- * receiving, so we should return back to lnet_finalize()
- * to make sure we are locking the correct partition.
- */
- return rc;
-
- } else if (!status && /* OK so far */
- (msg->msg_routing && !msg->msg_sending)) {
- /* not forwarded */
- LASSERT(!msg->msg_receiving); /* called back recv already */
- lnet_net_unlock(cpt);
-
- rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
-
- lnet_net_lock(cpt);
- /*
- * NB: message is committed for sending, we should return
- * on success because LND will finalize this message later.
- *
- * Also, there is possibility that message is committed for
- * sending and also failed before delivering to LND,
- * i.e: ENOMEM, in that case we can't fall through either:
- * - The rule is message must decommit for sending first if
- * the it's committed for both sending and receiving
- * - CPT for sending can be different with CPT for receiving,
- * so we should return back to lnet_finalize() to make
- * sure we are locking the correct partition.
- */
- return rc;
- }
-
- lnet_msg_decommit(msg, cpt, status);
- kfree(msg);
- return 0;
-}
-
-void
-lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int status)
-{
- struct lnet_msg_container *container;
- int my_slot;
- int cpt;
- int rc;
- int i;
-
- LASSERT(!in_interrupt());
-
- if (!msg)
- return;
-
- msg->msg_ev.status = status;
-
- if (msg->msg_md) {
- cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
-
- lnet_res_lock(cpt);
- lnet_msg_detach_md(msg, status);
- lnet_res_unlock(cpt);
- }
-
- again:
- rc = 0;
- if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
- /* not committed to network yet */
- LASSERT(!msg->msg_onactivelist);
- kfree(msg);
- return;
- }
-
- /*
- * NB: routed message can be committed for both receiving and sending,
- * we should finalize in LIFO order and keep counters correct.
- * (finalize sending first then finalize receiving)
- */
- cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
- lnet_net_lock(cpt);
-
- container = the_lnet.ln_msg_containers[cpt];
- list_add_tail(&msg->msg_list, &container->msc_finalizing);
-
- /*
- * Recursion breaker. Don't complete the message here if I am (or
- * enough other threads are) already completing messages
- */
- my_slot = -1;
- for (i = 0; i < container->msc_nfinalizers; i++) {
- if (container->msc_finalizers[i] == current)
- break;
-
- if (my_slot < 0 && !container->msc_finalizers[i])
- my_slot = i;
- }
-
- if (i < container->msc_nfinalizers || my_slot < 0) {
- lnet_net_unlock(cpt);
- return;
- }
-
- container->msc_finalizers[my_slot] = current;
-
- while (!list_empty(&container->msc_finalizing)) {
- msg = list_entry(container->msc_finalizing.next,
- struct lnet_msg, msg_list);
-
- list_del(&msg->msg_list);
-
- /*
- * NB drops and regains the lnet lock if it actually does
- * anything, so my finalizing friends can chomp along too
- */
- rc = lnet_complete_msg_locked(msg, cpt);
- if (rc)
- break;
- }
-
- if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) {
- lnet_net_unlock(cpt);
- lnet_delay_rule_check();
- lnet_net_lock(cpt);
- }
-
- container->msc_finalizers[my_slot] = NULL;
- lnet_net_unlock(cpt);
-
- if (rc)
- goto again;
-}
-EXPORT_SYMBOL(lnet_finalize);
-
-void
-lnet_msg_container_cleanup(struct lnet_msg_container *container)
-{
- int count = 0;
-
- if (!container->msc_init)
- return;
-
- while (!list_empty(&container->msc_active)) {
- struct lnet_msg *msg;
-
- msg = list_entry(container->msc_active.next,
- struct lnet_msg, msg_activelist);
- LASSERT(msg->msg_onactivelist);
- msg->msg_onactivelist = 0;
- list_del(&msg->msg_activelist);
- kfree(msg);
- count++;
- }
-
- if (count > 0)
- CERROR("%d active msg on exit\n", count);
-
- kvfree(container->msc_finalizers);
- container->msc_finalizers = NULL;
- container->msc_init = 0;
-}
-
-int
-lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
-{
- container->msc_init = 1;
-
- INIT_LIST_HEAD(&container->msc_active);
- INIT_LIST_HEAD(&container->msc_finalizing);
-
- /* number of CPUs */
- container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
-
- container->msc_finalizers = kvzalloc_cpt(container->msc_nfinalizers *
- sizeof(*container->msc_finalizers),
- GFP_KERNEL, cpt);
-
- if (!container->msc_finalizers) {
- CERROR("Failed to allocate message finalizers\n");
- lnet_msg_container_cleanup(container);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void
-lnet_msg_containers_destroy(void)
-{
- struct lnet_msg_container *container;
- int i;
-
- if (!the_lnet.ln_msg_containers)
- return;
-
- cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
- lnet_msg_container_cleanup(container);
-
- cfs_percpt_free(the_lnet.ln_msg_containers);
- the_lnet.ln_msg_containers = NULL;
-}
-
-int
-lnet_msg_containers_create(void)
-{
- struct lnet_msg_container *container;
- int rc;
- int i;
-
- the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*container));
-
- if (!the_lnet.ln_msg_containers) {
- CERROR("Failed to allocate cpu-partition data for network\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
- rc = lnet_msg_container_setup(container, i);
- if (rc) {
- lnet_msg_containers_destroy();
- return rc;
- }
- }
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
deleted file mode 100644
index fc47379c5938..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c
+++ /dev/null
@@ -1,987 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-ptl.c
- *
- * portal & match routines
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* NB: add /proc interfaces in upcoming patches */
-int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
-module_param(portal_rotor, int, 0644);
-MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions");
-
-static int
-lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id,
- __u64 mbits, __u64 ignore_bits)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[index];
- int unique;
-
- unique = !ignore_bits &&
- match_id.nid != LNET_NID_ANY &&
- match_id.pid != LNET_PID_ANY;
-
- LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
-
- /* prefer to check w/o any lock */
- if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
- goto match;
-
- /* unset, new portal */
- lnet_ptl_lock(ptl);
- /* check again with lock */
- if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
- lnet_ptl_unlock(ptl);
- goto match;
- }
-
- /* still not set */
- if (unique)
- lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
- else
- lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
-
- lnet_ptl_unlock(ptl);
-
- return 1;
-
- match:
- if ((lnet_ptl_is_unique(ptl) && !unique) ||
- (lnet_ptl_is_wildcard(ptl) && unique))
- return 0;
- return 1;
-}
-
-static void
-lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
-{
- struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
- int i;
-
- /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- mtable->mt_enabled = 1;
-
- ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
- for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
- LASSERT(ptl->ptl_mt_maps[i] != cpt);
- if (ptl->ptl_mt_maps[i] < cpt)
- break;
-
- /* swap to order */
- ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
- ptl->ptl_mt_maps[i] = cpt;
- }
-
- ptl->ptl_mt_nmaps++;
-}
-
-static void
-lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
-{
- struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
- int i;
-
- /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- if (LNET_CPT_NUMBER == 1)
- return; /* never disable the only match-table */
-
- mtable->mt_enabled = 0;
-
- LASSERT(ptl->ptl_mt_nmaps > 0 &&
- ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
-
- /* remove it from mt_maps */
- ptl->ptl_mt_nmaps--;
- for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
- if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
- ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
- }
-}
-
-static int
-lnet_try_match_md(struct lnet_libmd *md,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- /*
- * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
- * lnet_match_blocked_msg() relies on this to avoid races
- */
- unsigned int offset;
- unsigned int mlength;
- struct lnet_me *me = md->md_me;
-
- /* MD exhausted */
- if (lnet_md_exhausted(md))
- return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
-
- /* mismatched MD op */
- if (!(md->md_options & info->mi_opc))
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME nid/pid? */
- if (me->me_match_id.nid != LNET_NID_ANY &&
- me->me_match_id.nid != info->mi_id.nid)
- return LNET_MATCHMD_NONE;
-
- if (me->me_match_id.pid != LNET_PID_ANY &&
- me->me_match_id.pid != info->mi_id.pid)
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME matchbits? */
- if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits)
- return LNET_MATCHMD_NONE;
-
- /* Hurrah! This _is_ a match; check it out... */
-
- if (!(md->md_options & LNET_MD_MANAGE_REMOTE))
- offset = md->md_offset;
- else
- offset = info->mi_roffset;
-
- if (md->md_options & LNET_MD_MAX_SIZE) {
- mlength = md->md_max_size;
- LASSERT(md->md_offset + mlength <= md->md_length);
- } else {
- mlength = md->md_length - offset;
- }
-
- if (info->mi_rlength <= mlength) { /* fits in allowed space */
- mlength = info->mi_rlength;
- } else if (!(md->md_options & LNET_MD_TRUNCATE)) {
- /* this packet _really_ is too big */
- CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n",
- libcfs_id2str(info->mi_id), info->mi_mbits,
- info->mi_rlength, md->md_length - offset, mlength);
-
- return LNET_MATCHMD_DROP;
- }
-
- /* Commit to this ME/MD */
- CDEBUG(D_NET, "Incoming %s index %x from %s of length %d/%d into md %#llx [%d] + %d\n",
- (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
- info->mi_portal, libcfs_id2str(info->mi_id), mlength,
- info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
-
- lnet_msg_attach_md(msg, md, offset, mlength);
- md->md_offset = offset + mlength;
-
- if (!lnet_md_exhausted(md))
- return LNET_MATCHMD_OK;
-
- /*
- * Auto-unlink NOW, so the ME gets unlinked if required.
- * We bumped md->md_refcount above so the MD just gets flagged
- * for unlink when it is finalized.
- */
- if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK)
- lnet_md_unlink(md);
-
- return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
-}
-
-static struct lnet_match_table *
-lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits)
-{
- if (LNET_CPT_NUMBER == 1)
- return ptl->ptl_mtables[0]; /* the only one */
-
- /* if it's a unique portal, return match-table hashed by NID */
- return lnet_ptl_is_unique(ptl) ?
- ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
-}
-
-struct lnet_match_table *
-lnet_mt_of_attach(unsigned int index, struct lnet_process_id id,
- __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos)
-{
- struct lnet_portal *ptl;
- struct lnet_match_table *mtable;
-
- /* NB: called w/o lock */
- LASSERT(index < the_lnet.ln_nportals);
-
- if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
- return NULL;
-
- ptl = the_lnet.ln_portals[index];
-
- mtable = lnet_match2mt(ptl, id, mbits);
- if (mtable) /* unique portal or only one match-table */
- return mtable;
-
- /* it's a wildcard portal */
- switch (pos) {
- default:
- return NULL;
- case LNET_INS_BEFORE:
- case LNET_INS_AFTER:
- /*
- * posted by no affinity thread, always hash to specific
- * match-table to avoid buffer stealing which is heavy
- */
- return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
- case LNET_INS_LOCAL:
- /* posted by cpu-affinity thread */
- return ptl->ptl_mtables[lnet_cpt_current()];
- }
-}
-
-static struct lnet_match_table *
-lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct lnet_match_table *mtable;
- struct lnet_portal *ptl;
- unsigned int nmaps;
- unsigned int rotor;
- unsigned int cpt;
- bool routed;
-
- /* NB: called w/o lock */
- LASSERT(info->mi_portal < the_lnet.ln_nportals);
- ptl = the_lnet.ln_portals[info->mi_portal];
-
- LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
-
- mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
- if (mtable)
- return mtable;
-
- /* it's a wildcard portal */
- routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
- LNET_NIDNET(msg->msg_hdr.dest_nid);
-
- if (portal_rotor == LNET_PTL_ROTOR_OFF ||
- (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
- cpt = lnet_cpt_current();
- if (ptl->ptl_mtables[cpt]->mt_enabled)
- return ptl->ptl_mtables[cpt];
- }
-
- rotor = ptl->ptl_rotor++; /* get round-robin factor */
- if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
- cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
- else
- cpt = rotor % LNET_CPT_NUMBER;
-
- if (!ptl->ptl_mtables[cpt]->mt_enabled) {
- /* is there any active entry for this portal? */
- nmaps = ptl->ptl_mt_nmaps;
- /* map to an active mtable to avoid heavy "stealing" */
- if (nmaps) {
- /*
- * NB: there is possibility that ptl_mt_maps is being
- * changed because we are not under protection of
- * lnet_ptl_lock, but it shouldn't hurt anything
- */
- cpt = ptl->ptl_mt_maps[rotor % nmaps];
- }
- }
-
- return ptl->ptl_mtables[cpt];
-}
-
-static int
-lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
-{
- __u64 *bmap;
- int i;
-
- if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
- return 0;
-
- if (pos < 0) { /* check all bits */
- for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
- if (mtable->mt_exhausted[i] != (__u64)(-1))
- return 0;
- }
- return 1;
- }
-
- LASSERT(pos <= LNET_MT_HASH_IGNORE);
- /* mtable::mt_mhash[pos] is marked as exhausted or not */
- bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
- pos &= (1 << LNET_MT_BITS_U64) - 1;
-
- return (*bmap & BIT(pos));
-}
-
-static void
-lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
-{
- __u64 *bmap;
-
- LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
- LASSERT(pos <= LNET_MT_HASH_IGNORE);
-
- /* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
- bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
- pos &= (1 << LNET_MT_BITS_U64) - 1;
-
- if (!exhausted)
- *bmap &= ~(1ULL << pos);
- else
- *bmap |= 1ULL << pos;
-}
-
-struct list_head *
-lnet_mt_match_head(struct lnet_match_table *mtable,
- struct lnet_process_id id, __u64 mbits)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
- unsigned long hash = mbits;
-
- if (!lnet_ptl_is_wildcard(ptl)) {
- hash += id.nid + id.pid;
-
- LASSERT(lnet_ptl_is_unique(ptl));
- hash = hash_long(hash, LNET_MT_HASH_BITS);
- }
- return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK];
-}
-
-int
-lnet_mt_match_md(struct lnet_match_table *mtable,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct list_head *head;
- struct lnet_me *me;
- struct lnet_me *tmp;
- int exhausted = 0;
- int rc;
-
- /* any ME with ignore bits? */
- if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
- head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
- else
- head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
- again:
- /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
- if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
- exhausted = LNET_MATCHMD_EXHAUSTED;
-
- list_for_each_entry_safe(me, tmp, head, me_list) {
- /* ME attached but MD not attached yet */
- if (!me->me_md)
- continue;
-
- LASSERT(me == me->me_md->md_me);
-
- rc = lnet_try_match_md(me->me_md, info, msg);
- if (!(rc & LNET_MATCHMD_EXHAUSTED))
- exhausted = 0; /* mlist is not empty */
-
- if (rc & LNET_MATCHMD_FINISH) {
- /*
- * don't return EXHAUSTED bit because we don't know
- * whether the mlist is empty or not
- */
- return rc & ~LNET_MATCHMD_EXHAUSTED;
- }
- }
-
- if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
- lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
- if (!lnet_mt_test_exhausted(mtable, -1))
- exhausted = 0;
- }
-
- if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
- head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
- goto again; /* re-check MEs w/o ignore-bits */
- }
-
- if (info->mi_opc == LNET_MD_OP_GET ||
- !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
- return exhausted | LNET_MATCHMD_DROP;
-
- return exhausted | LNET_MATCHMD_NONE;
-}
-
-static int
-lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
-{
- int rc;
-
- /*
- * message arrived before any buffer posting on this portal,
- * simply delay or drop this message
- */
- if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
- return 0;
-
- lnet_ptl_lock(ptl);
- /* check it again with hold of lock */
- if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
- lnet_ptl_unlock(ptl);
- return 0;
- }
-
- if (lnet_ptl_is_lazy(ptl)) {
- if (msg->msg_rx_ready_delay) {
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_delayed);
- }
- rc = LNET_MATCHMD_NONE;
- } else {
- rc = LNET_MATCHMD_DROP;
- }
-
- lnet_ptl_unlock(ptl);
- return rc;
-}
-
-static int
-lnet_ptl_match_delay(struct lnet_portal *ptl,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- int first = ptl->ptl_mt_maps[0]; /* read w/o lock */
- int rc = 0;
- int i;
-
- /**
- * Steal buffer from other CPTs, and delay msg if nothing to
- * steal. This function is more expensive than a regular
- * match, but we don't expect it can happen a lot. The return
- * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or
- * LNET_MATCHMD_NONE.
- */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- for (i = 0; i < LNET_CPT_NUMBER; i++) {
- struct lnet_match_table *mtable;
- int cpt;
-
- cpt = (first + i) % LNET_CPT_NUMBER;
- mtable = ptl->ptl_mtables[cpt];
- if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
- continue;
-
- lnet_res_lock(cpt);
- lnet_ptl_lock(ptl);
-
- if (!i) {
- /* The first try, add to stealing list. */
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_stealing);
- }
-
- if (!list_empty(&msg->msg_list)) {
- /* On stealing list. */
- rc = lnet_mt_match_md(mtable, info, msg);
-
- if ((rc & LNET_MATCHMD_EXHAUSTED) &&
- mtable->mt_enabled)
- lnet_ptl_disable_mt(ptl, cpt);
-
- if (rc & LNET_MATCHMD_FINISH) {
- /* Match found, remove from stealing list. */
- list_del_init(&msg->msg_list);
- } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */
- !ptl->ptl_mt_nmaps || /* (2) */
- (ptl->ptl_mt_nmaps == 1 && /* (3) */
- ptl->ptl_mt_maps[0] == cpt)) {
- /**
- * No match found, and this is either
- * (1) the last cpt to check, or
- * (2) there is no active cpt, or
- * (3) this is the only active cpt.
- * There is nothing to steal: delay or
- * drop the message.
- */
- list_del_init(&msg->msg_list);
-
- if (lnet_ptl_is_lazy(ptl)) {
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_delayed);
- rc = LNET_MATCHMD_NONE;
- } else {
- rc = LNET_MATCHMD_DROP;
- }
- } else {
- /* Do another iteration. */
- rc = 0;
- }
- } else {
- /**
- * No longer on stealing list: another thread
- * matched the message in lnet_ptl_attach_md().
- * We are now expected to handle the message.
- */
- rc = !msg->msg_md ?
- LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
- }
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(cpt);
-
- /**
- * Note that test (1) above ensures that we always
- * exit the loop through this break statement.
- *
- * LNET_MATCHMD_NONE means msg was added to the
- * delayed queue, and we may no longer reference it
- * after lnet_ptl_unlock() and lnet_res_unlock().
- */
- if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE))
- break;
- }
-
- return rc;
-}
-
-int
-lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct lnet_match_table *mtable;
- struct lnet_portal *ptl;
- int rc;
-
- CDEBUG(D_NET, "Request from %s of length %d into portal %d MB=%#llx\n",
- libcfs_id2str(info->mi_id), info->mi_rlength, info->mi_portal,
- info->mi_mbits);
-
- if (info->mi_portal >= the_lnet.ln_nportals) {
- CERROR("Invalid portal %d not in [0-%d]\n",
- info->mi_portal, the_lnet.ln_nportals);
- return LNET_MATCHMD_DROP;
- }
-
- ptl = the_lnet.ln_portals[info->mi_portal];
- rc = lnet_ptl_match_early(ptl, msg);
- if (rc) /* matched or delayed early message */
- return rc;
-
- mtable = lnet_mt_of_match(info, msg);
- lnet_res_lock(mtable->mt_cpt);
-
- if (the_lnet.ln_shutdown) {
- rc = LNET_MATCHMD_DROP;
- goto out1;
- }
-
- rc = lnet_mt_match_md(mtable, info, msg);
- if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) {
- lnet_ptl_lock(ptl);
- lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
- lnet_ptl_unlock(ptl);
- }
-
- if (rc & LNET_MATCHMD_FINISH) /* matched or dropping */
- goto out1;
-
- if (!msg->msg_rx_ready_delay)
- goto out1;
-
- LASSERT(lnet_ptl_is_lazy(ptl));
- LASSERT(!msg->msg_rx_delayed);
-
- /* NB: we don't expect "delay" can happen a lot */
- if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
- lnet_ptl_lock(ptl);
-
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(mtable->mt_cpt);
- rc = LNET_MATCHMD_NONE;
- } else {
- lnet_res_unlock(mtable->mt_cpt);
- rc = lnet_ptl_match_delay(ptl, info, msg);
- }
-
- /* LNET_MATCHMD_NONE means msg was added to the delay queue */
- if (rc & LNET_MATCHMD_NONE) {
- CDEBUG(D_NET,
- "Delaying %s from %s ptl %d MB %#llx off %d len %d\n",
- info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
- libcfs_id2str(info->mi_id), info->mi_portal,
- info->mi_mbits, info->mi_roffset, info->mi_rlength);
- }
- goto out0;
- out1:
- lnet_res_unlock(mtable->mt_cpt);
- out0:
- /* EXHAUSTED bit is only meaningful for internal functions */
- return rc & ~LNET_MATCHMD_EXHAUSTED;
-}
-
-void
-lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md)
-{
- LASSERT(me->me_md == md && md->md_me == me);
-
- me->me_md = NULL;
- md->md_me = NULL;
-}
-
-/* called with lnet_res_lock held */
-void
-lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
- struct list_head *matches, struct list_head *drops)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
- struct lnet_match_table *mtable;
- struct list_head *head;
- struct lnet_msg *tmp;
- struct lnet_msg *msg;
- int exhausted = 0;
- int cpt;
-
- LASSERT(!md->md_refcount); /* a brand new MD */
-
- me->me_md = md;
- md->md_me = me;
-
- cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
- mtable = ptl->ptl_mtables[cpt];
-
- if (list_empty(&ptl->ptl_msg_stealing) &&
- list_empty(&ptl->ptl_msg_delayed) &&
- !lnet_mt_test_exhausted(mtable, me->me_pos))
- return;
-
- lnet_ptl_lock(ptl);
- head = &ptl->ptl_msg_stealing;
- again:
- list_for_each_entry_safe(msg, tmp, head, msg_list) {
- struct lnet_match_info info;
- struct lnet_hdr *hdr;
- int rc;
-
- LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
-
- hdr = &msg->msg_hdr;
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_PUT;
- info.mi_portal = hdr->msg.put.ptl_index;
- info.mi_rlength = hdr->payload_length;
- info.mi_roffset = hdr->msg.put.offset;
- info.mi_mbits = hdr->msg.put.match_bits;
-
- rc = lnet_try_match_md(md, &info, msg);
-
- exhausted = (rc & LNET_MATCHMD_EXHAUSTED);
- if (rc & LNET_MATCHMD_NONE) {
- if (exhausted)
- break;
- continue;
- }
-
- /* Hurrah! This _is_ a match */
- LASSERT(rc & LNET_MATCHMD_FINISH);
- list_del_init(&msg->msg_list);
-
- if (head == &ptl->ptl_msg_stealing) {
- if (exhausted)
- break;
- /* stealing thread will handle the message */
- continue;
- }
-
- if (rc & LNET_MATCHMD_OK) {
- list_add_tail(&msg->msg_list, matches);
-
- CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
- libcfs_id2str(info.mi_id),
- info.mi_portal, info.mi_mbits,
- info.mi_roffset, info.mi_rlength);
- } else {
- list_add_tail(&msg->msg_list, drops);
- }
-
- if (exhausted)
- break;
- }
-
- if (!exhausted && head == &ptl->ptl_msg_stealing) {
- head = &ptl->ptl_msg_delayed;
- goto again;
- }
-
- if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
- lnet_mt_set_exhausted(mtable, me->me_pos, 0);
- if (!mtable->mt_enabled)
- lnet_ptl_enable_mt(ptl, cpt);
- }
-
- lnet_ptl_unlock(ptl);
-}
-
-static void
-lnet_ptl_cleanup(struct lnet_portal *ptl)
-{
- struct lnet_match_table *mtable;
- int i;
-
- if (!ptl->ptl_mtables) /* uninitialized portal */
- return;
-
- LASSERT(list_empty(&ptl->ptl_msg_delayed));
- LASSERT(list_empty(&ptl->ptl_msg_stealing));
- cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
- struct list_head *mhash;
- struct lnet_me *me;
- int j;
-
- if (!mtable->mt_mhash) /* uninitialized match-table */
- continue;
-
- mhash = mtable->mt_mhash;
- /* cleanup ME */
- for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
- while (!list_empty(&mhash[j])) {
- me = list_entry(mhash[j].next,
- struct lnet_me, me_list);
- CERROR("Active ME %p on exit\n", me);
- list_del(&me->me_list);
- kfree(me);
- }
- }
- /* the extra entry is for MEs with ignore bits */
- kvfree(mhash);
- }
-
- cfs_percpt_free(ptl->ptl_mtables);
- ptl->ptl_mtables = NULL;
-}
-
-static int
-lnet_ptl_setup(struct lnet_portal *ptl, int index)
-{
- struct lnet_match_table *mtable;
- struct list_head *mhash;
- int i;
- int j;
-
- ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct lnet_match_table));
- if (!ptl->ptl_mtables) {
- CERROR("Failed to create match table for portal %d\n", index);
- return -ENOMEM;
- }
-
- ptl->ptl_index = index;
- INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
- INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
- spin_lock_init(&ptl->ptl_lock);
- cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
- /* the extra entry is for MEs with ignore bits */
- mhash = kvzalloc_cpt(sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1),
- GFP_KERNEL, i);
- if (!mhash) {
- CERROR("Failed to create match hash for portal %d\n",
- index);
- goto failed;
- }
-
- memset(&mtable->mt_exhausted[0], -1,
- sizeof(mtable->mt_exhausted[0]) *
- LNET_MT_EXHAUSTED_BMAP);
- mtable->mt_mhash = mhash;
- for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
- INIT_LIST_HEAD(&mhash[j]);
-
- mtable->mt_portal = index;
- mtable->mt_cpt = i;
- }
-
- return 0;
- failed:
- lnet_ptl_cleanup(ptl);
- return -ENOMEM;
-}
-
-void
-lnet_portals_destroy(void)
-{
- int i;
-
- if (!the_lnet.ln_portals)
- return;
-
- for (i = 0; i < the_lnet.ln_nportals; i++)
- lnet_ptl_cleanup(the_lnet.ln_portals[i]);
-
- cfs_array_free(the_lnet.ln_portals);
- the_lnet.ln_portals = NULL;
- the_lnet.ln_nportals = 0;
-}
-
-int
-lnet_portals_create(void)
-{
- int size;
- int i;
-
- size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
-
- the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size);
- if (!the_lnet.ln_portals) {
- CERROR("Failed to allocate portals table\n");
- return -ENOMEM;
- }
- the_lnet.ln_nportals = MAX_PORTALS;
-
- for (i = 0; i < the_lnet.ln_nportals; i++) {
- if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
- lnet_portals_destroy();
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
-/**
- * Turn on the lazy portal attribute. Use with caution!
- *
- * This portal attribute only affects incoming PUT requests to the portal,
- * and is off by default. By default, if there's no matching MD for an
- * incoming PUT request, it is simply dropped. With the lazy attribute on,
- * such requests are queued indefinitely until either a matching MD is
- * posted to the portal or the lazy attribute is turned off.
- *
- * It would prevent dropped requests, however it should be regarded as the
- * last line of defense - i.e. users must keep a close watch on active
- * buffers on a lazy portal and once it becomes too low post more buffers as
- * soon as possible. This is because delayed requests usually have detrimental
- * effects on underlying network connections. A few delayed requests often
- * suffice to bring an underlying connection to a complete halt, due to flow
- * control mechanisms.
- *
- * There's also a DOS attack risk. If users don't post match-all MDs on a
- * lazy portal, a malicious peer can easily stop a service by sending some
- * PUT requests with match bits that won't match any MD. A routed server is
- * especially vulnerable since the connections to its neighbor routers are
- * shared among all clients.
- *
- * \param portal Index of the portal to enable the lazy attribute on.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetSetLazyPortal(int portal)
-{
- struct lnet_portal *ptl;
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
- ptl = the_lnet.ln_portals[portal];
-
- lnet_res_lock(LNET_LOCK_EX);
- lnet_ptl_lock(ptl);
-
- lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
-
- return 0;
-}
-EXPORT_SYMBOL(LNetSetLazyPortal);
-
-int
-lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
-{
- struct lnet_portal *ptl;
- LIST_HEAD(zombies);
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- ptl = the_lnet.ln_portals[portal];
-
- lnet_res_lock(LNET_LOCK_EX);
- lnet_ptl_lock(ptl);
-
- if (!lnet_ptl_is_lazy(ptl)) {
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
- return 0;
- }
-
- if (ni) {
- struct lnet_msg *msg, *tmp;
-
- /* grab all messages which are on the NI passed in */
- list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
- msg_list) {
- if (msg->msg_rxpeer->lp_ni == ni)
- list_move(&msg->msg_list, &zombies);
- }
- } else {
- if (the_lnet.ln_shutdown)
- CWARN("Active lazy portal %d on exit\n", portal);
- else
- CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
-
- /* grab all the blocked messages atomically */
- list_splice_init(&ptl->ptl_msg_delayed, &zombies);
-
- lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
- }
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
-
- lnet_drop_delayed_msg_list(&zombies, reason);
-
- return 0;
-}
-
-/**
- * Turn off the lazy portal attribute. Delayed requests on the portal,
- * if any, will be all dropped when this function returns.
- *
- * \param portal Index of the portal to disable the lazy attribute on.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetClearLazyPortal(int portal)
-{
- return lnet_clear_lazy_portal(NULL, portal,
- "Clearing lazy portal attr");
-}
-EXPORT_SYMBOL(LNetClearLazyPortal);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
deleted file mode 100644
index 1bee667802b0..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ /dev/null
@@ -1,586 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/if.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/file.h>
-#include <linux/pagemap.h>
-/* For sys_open & sys_close */
-#include <linux/syscalls.h>
-#include <net/sock.h>
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-
-static int
-kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
-{
- mm_segment_t oldfs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
- set_fs(oldfs);
-
- return err;
-}
-
-static int
-lnet_sock_ioctl(int cmd, unsigned long arg)
-{
- struct file *sock_filp;
- struct socket *sock;
- int rc;
-
- rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
- if (rc) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- sock_filp = sock_alloc_file(sock, 0, NULL);
- if (IS_ERR(sock_filp))
- return PTR_ERR(sock_filp);
-
- rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
-
- fput(sock_filp);
- return rc;
-}
-
-int
-lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
-{
- struct ifreq ifr;
- int nob;
- int rc;
- __be32 val;
-
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- return -EINVAL;
- }
-
- BUILD_BUG_ON(sizeof(ifr.ifr_name) < IFNAMSIZ);
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get flags for interface %s\n", name);
- return rc;
- }
-
- if (!(ifr.ifr_flags & IFF_UP)) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = *mask = 0;
- return 0;
- }
- *up = 1;
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get IP address for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get netmask for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
- *mask = ntohl(val);
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_ipif_query);
-
-int
-lnet_ipif_enumerate(char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- char **names;
- int toobig;
- int nalloc;
- int nfound;
- struct ifreq *ifr;
- struct ifconf ifc;
- int rc;
- int nob;
- int i;
-
- nalloc = 16; /* first guess at max interfaces */
- toobig = 0;
- for (;;) {
- if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
- toobig = 1;
- nalloc = PAGE_SIZE / sizeof(*ifr);
- CWARN("Too many interfaces: only enumerating first %d\n",
- nalloc);
- }
-
- ifr = kzalloc(nalloc * sizeof(*ifr), GFP_KERNEL);
- if (!ifr) {
- CERROR("ENOMEM enumerating up to %d interfaces\n",
- nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
- rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
- if (rc < 0) {
- CERROR("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- LASSERT(!rc);
-
- nfound = ifc.ifc_len / sizeof(*ifr);
- LASSERT(nfound <= nalloc);
-
- if (nfound < nalloc || toobig)
- break;
-
- kfree(ifr);
- nalloc *= 2;
- }
-
- if (!nfound)
- goto out1;
-
- names = kzalloc(nfound * sizeof(*names), GFP_KERNEL);
- if (!names) {
- rc = -ENOMEM;
- goto out1;
- }
-
- for (i = 0; i < nfound; i++) {
- nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, ifr[i].ifr_name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out2;
- }
-
- names[i] = kmalloc(IFNAMSIZ, GFP_KERNEL);
- if (!names[i]) {
- rc = -ENOMEM;
- goto out2;
- }
-
- memcpy(names[i], ifr[i].ifr_name, nob);
- names[i][nob] = 0;
- }
-
- *namesp = names;
- rc = nfound;
-
-out2:
- if (rc < 0)
- lnet_ipif_free_enumeration(names, nfound);
-out1:
- kfree(ifr);
-out0:
- return rc;
-}
-EXPORT_SYMBOL(lnet_ipif_enumerate);
-
-void
-lnet_ipif_free_enumeration(char **names, int n)
-{
- int i;
-
- LASSERT(n > 0);
-
- for (i = 0; i < n && names[i]; i++)
- kfree(names[i]);
-
- kfree(names);
-}
-EXPORT_SYMBOL(lnet_ipif_free_enumeration);
-
-int
-lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
- struct kvec iov = { .iov_base = buffer, .iov_len = nob };
- struct msghdr msg = {NULL,};
-
- LASSERT(nob > 0);
- /*
- * Caller may pass a zero timeout if she thinks the socket buffer is
- * empty enough to take the whole message immediately
- */
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
- for (;;) {
- msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
- if (timeout) {
- /* Set send timeout to remaining time */
- jiffies_to_timeval(jiffies_left, &tv);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc) {
- CERROR("Can't set socket send timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
- }
-
- then = jiffies;
- rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
- jiffies_left -= jiffies - then;
-
- if (rc < 0)
- return rc;
-
- if (!rc) {
- CERROR("Unexpected zero rc\n");
- return -ECONNABORTED;
- }
-
- if (!msg_data_left(&msg))
- break;
-
- if (jiffies_left <= 0)
- return -EAGAIN;
- }
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_write);
-
-int
-lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
- struct kvec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_flags = 0
- };
-
- LASSERT(nob > 0);
- LASSERT(jiffies_left > 0);
-
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, nob);
-
- for (;;) {
- /* Set receive timeout to remaining time */
- jiffies_to_timeval(jiffies_left, &tv);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc) {
- CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
-
- then = jiffies;
- rc = sock_recvmsg(sock, &msg, 0);
- jiffies_left -= jiffies - then;
-
- if (rc < 0)
- return rc;
-
- if (!rc)
- return -ECONNRESET;
-
- if (!msg_data_left(&msg))
- return 0;
-
- if (jiffies_left <= 0)
- return -ETIMEDOUT;
- }
-}
-EXPORT_SYMBOL(lnet_sock_read);
-
-static int
-lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
- int local_port)
-{
- struct sockaddr_in locaddr;
- struct socket *sock;
- int rc;
- int option;
-
- /* All errors are fatal except bind failure if the port is in use */
- *fatal = 1;
-
- rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
- *sockp = sock;
- if (rc) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- option = 1;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
- goto failed;
- }
-
- if (local_ip || local_port) {
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- if (!local_ip)
- locaddr.sin_addr.s_addr = htonl(INADDR_ANY);
- else
- locaddr.sin_addr.s_addr = htonl(local_ip);
-
- rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
- sizeof(locaddr));
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto failed;
- }
- if (rc) {
- CERROR("Error trying to bind to port %d: %d\n",
- local_port, rc);
- goto failed;
- }
- }
- return 0;
-
-failed:
- sock_release(sock);
- return rc;
-}
-
-int
-lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
-{
- int option;
- int rc;
-
- if (txbufsize) {
- option = txbufsize;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set send buffer %d: %d\n",
- option, rc);
- return rc;
- }
- }
-
- if (rxbufsize) {
- option = rxbufsize;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set receive buffer %d: %d\n",
- option, rc);
- return rc;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_setbuf);
-
-int
-lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
-{
- struct sockaddr_in sin;
- int rc;
-
- if (remote)
- rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
- else
- rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
- if (rc < 0) {
- CERROR("Error %d getting sock %s IP/port\n",
- rc, remote ? "peer" : "local");
- return rc;
- }
-
- if (ip)
- *ip = ntohl(sin.sin_addr.s_addr);
-
- if (port)
- *port = ntohs(sin.sin_port);
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getaddr);
-
-int
-lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
-{
- if (txbufsize)
- *txbufsize = sock->sk->sk_sndbuf;
-
- if (rxbufsize)
- *rxbufsize = sock->sk->sk_rcvbuf;
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getbuf);
-
-int
-lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
- int backlog)
-{
- int fatal;
- int rc;
-
- rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
- if (rc) {
- if (!fatal)
- CERROR("Can't create socket: port %d already in use\n",
- local_port);
- return rc;
- }
-
- rc = kernel_listen(*sockp, backlog);
- if (!rc)
- return 0;
-
- CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
- sock_release(*sockp);
- return rc;
-}
-
-int
-lnet_sock_accept(struct socket **newsockp, struct socket *sock)
-{
- wait_queue_entry_t wait;
- struct socket *newsock;
- int rc;
-
- /*
- * XXX this should add a ref to sock->ops->owner, if
- * TCP could be a module
- */
- rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
- if (rc) {
- CERROR("Can't allocate socket\n");
- return rc;
- }
-
- newsock->ops = sock->ops;
-
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
- if (rc == -EAGAIN) {
- /* Nothing ready, so wait for activity */
- init_waitqueue_entry(&wait, current);
- add_wait_queue(sk_sleep(sock->sk), &wait);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- remove_wait_queue(sk_sleep(sock->sk), &wait);
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
- }
-
- if (rc)
- goto failed;
-
- *newsockp = newsock;
- return 0;
-
-failed:
- sock_release(newsock);
- return rc;
-}
-
-int
-lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
- int local_port, __u32 peer_ip, int peer_port)
-{
- struct sockaddr_in srvaddr;
- int rc;
-
- rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
- if (rc)
- return rc;
-
- memset(&srvaddr, 0, sizeof(srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons(peer_port);
- srvaddr.sin_addr.s_addr = htonl(peer_ip);
-
- rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
- sizeof(srvaddr), 0);
- if (!rc)
- return 0;
-
- /*
- * EADDRNOTAVAIL probably means we're already connected to the same
- * peer/port on the same local port on a differently typed
- * connection. Let our caller retry with a different local
- * port...
- */
- *fatal = !(rc == -EADDRNOTAVAIL);
-
- CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
- "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
- &local_ip, local_port, &peer_ip, peer_port);
-
- sock_release(*sockp);
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
deleted file mode 100644
index 7456b989e451..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-static int
-lolnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
- LASSERT(!lntmsg->msg_routing);
- LASSERT(!lntmsg->msg_target_is_router);
-
- return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
-}
-
-static int
-lolnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen)
-{
- struct lnet_msg *sendmsg = private;
-
- if (lntmsg) { /* not discarding */
- if (sendmsg->msg_iov)
- lnet_copy_iov2iter(to,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset,
- iov_iter_count(to));
- else
- lnet_copy_kiov2iter(to,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset,
- iov_iter_count(to));
-
- lnet_finalize(ni, lntmsg, 0);
- }
-
- lnet_finalize(ni, sendmsg, 0);
- return 0;
-}
-
-static int lolnd_instanced;
-
-static void
-lolnd_shutdown(struct lnet_ni *ni)
-{
- CDEBUG(D_NET, "shutdown\n");
- LASSERT(lolnd_instanced);
-
- lolnd_instanced = 0;
-}
-
-static int
-lolnd_startup(struct lnet_ni *ni)
-{
- LASSERT(ni->ni_lnd == &the_lolnd);
- LASSERT(!lolnd_instanced);
- lolnd_instanced = 1;
-
- return 0;
-}
-
-struct lnet_lnd the_lolnd = {
- /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
- /* .lnd_refcount = */ 0,
- /* .lnd_type = */ LOLND,
- /* .lnd_startup = */ lolnd_startup,
- /* .lnd_shutdown = */ lolnd_shutdown,
- /* .lnt_ctl = */ NULL,
- /* .lnd_send = */ lolnd_send,
- /* .lnd_recv = */ lolnd_recv,
- /* .lnd_eager_recv = */ NULL,
- /* .lnd_notify = */ NULL,
- /* .lnd_accept = */ NULL
-};
diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c
deleted file mode 100644
index c0c4723f72fd..000000000000
--- a/drivers/staging/lustre/lnet/lnet/module.c
+++ /dev/null
@@ -1,223 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-static int config_on_load;
-module_param(config_on_load, int, 0444);
-MODULE_PARM_DESC(config_on_load, "configure network at module load");
-
-static struct mutex lnet_config_mutex;
-
-static int
-lnet_configure(void *arg)
-{
- /* 'arg' only there so I can be passed to cfs_create_thread() */
- int rc = 0;
-
- mutex_lock(&lnet_config_mutex);
-
- if (!the_lnet.ln_niinit_self) {
- rc = try_module_get(THIS_MODULE);
-
- if (rc != 1)
- goto out;
-
- rc = LNetNIInit(LNET_PID_LUSTRE);
- if (rc >= 0) {
- the_lnet.ln_niinit_self = 1;
- rc = 0;
- } else {
- module_put(THIS_MODULE);
- }
- }
-
-out:
- mutex_unlock(&lnet_config_mutex);
- return rc;
-}
-
-static int
-lnet_unconfigure(void)
-{
- int refcount;
-
- mutex_lock(&lnet_config_mutex);
-
- if (the_lnet.ln_niinit_self) {
- the_lnet.ln_niinit_self = 0;
- LNetNIFini();
- module_put(THIS_MODULE);
- }
-
- mutex_lock(&the_lnet.ln_api_mutex);
- refcount = the_lnet.ln_refcount;
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- mutex_unlock(&lnet_config_mutex);
- return !refcount ? 0 : -EBUSY;
-}
-
-static int
-lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
-{
- struct lnet_ioctl_config_data *conf =
- (struct lnet_ioctl_config_data *)hdr;
- int rc;
-
- if (conf->cfg_hdr.ioc_len < sizeof(*conf))
- return -EINVAL;
-
- mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
- rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
-out_unlock:
- mutex_unlock(&lnet_config_mutex);
-
- return rc;
-}
-
-static int
-lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr)
-{
- struct lnet_ioctl_config_data *conf =
- (struct lnet_ioctl_config_data *)hdr;
- int rc;
-
- if (conf->cfg_hdr.ioc_len < sizeof(*conf))
- return -EINVAL;
-
- mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
- rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_del_ni(conf->cfg_net);
-out_unlock:
- mutex_unlock(&lnet_config_mutex);
-
- return rc;
-}
-
-static int
-lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_hdr *hdr)
-{
- int rc;
-
- switch (cmd) {
- case IOC_LIBCFS_CONFIGURE: {
- struct libcfs_ioctl_data *data =
- (struct libcfs_ioctl_data *)hdr;
-
- if (data->ioc_hdr.ioc_len < sizeof(*data))
- return -EINVAL;
-
- the_lnet.ln_nis_from_mod_params = data->ioc_flags;
- return lnet_configure(NULL);
- }
-
- case IOC_LIBCFS_UNCONFIGURE:
- return lnet_unconfigure();
-
- case IOC_LIBCFS_ADD_NET:
- return lnet_dyn_configure(hdr);
-
- case IOC_LIBCFS_DEL_NET:
- return lnet_dyn_unconfigure(hdr);
-
- default:
- /*
- * Passing LNET_PID_ANY only gives me a ref if the net is up
- * already; I'll need it to ensure the net can't go down while
- * I'm called into it
- */
- rc = LNetNIInit(LNET_PID_ANY);
- if (rc >= 0) {
- rc = LNetCtl(cmd, hdr);
- LNetNIFini();
- }
- return rc;
- }
-}
-
-static DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl);
-
-static int __init lnet_init(void)
-{
- int rc;
-
- mutex_init(&lnet_config_mutex);
-
- rc = lnet_lib_init();
- if (rc) {
- CERROR("lnet_lib_init: error %d\n", rc);
- return rc;
- }
-
- rc = libcfs_register_ioctl(&lnet_ioctl_handler);
- LASSERT(!rc);
-
- if (config_on_load) {
- /*
- * Have to schedule a separate thread to avoid deadlocking
- * in modload
- */
- (void)kthread_run(lnet_configure, NULL, "lnet_initd");
- }
-
- return 0;
-}
-
-static void __exit lnet_exit(void)
-{
- int rc;
-
- rc = libcfs_deregister_ioctl(&lnet_ioctl_handler);
- LASSERT(!rc);
-
- lnet_lib_exit();
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Networking layer");
-MODULE_VERSION(LNET_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(lnet_init);
-module_exit(lnet_exit);
diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c
deleted file mode 100644
index a63b7941d435..000000000000
--- a/drivers/staging/lustre/lnet/lnet/net_fault.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/lnet/net_fault.c
- *
- * Lustre network fault simulation
- *
- * Author: liang.zhen@intel.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnetctl.h>
-
-#define LNET_MSG_MASK (LNET_PUT_BIT | LNET_ACK_BIT | \
- LNET_GET_BIT | LNET_REPLY_BIT)
-
-struct lnet_drop_rule {
- /** link chain on the_lnet.ln_drop_rules */
- struct list_head dr_link;
- /** attributes of this rule */
- struct lnet_fault_attr dr_attr;
- /** lock to protect \a dr_drop_at and \a dr_stat */
- spinlock_t dr_lock;
- /**
- * the message sequence to drop, which means message is dropped when
- * dr_stat.drs_count == dr_drop_at
- */
- unsigned long dr_drop_at;
- /**
- * seconds to drop the next message, it's exclusive with dr_drop_at
- */
- unsigned long dr_drop_time;
- /** baseline to caculate dr_drop_time */
- unsigned long dr_time_base;
- /** statistic of dropped messages */
- struct lnet_fault_stat dr_stat;
-};
-
-static bool
-lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid)
-{
- if (nid == msg_nid || nid == LNET_NID_ANY)
- return true;
-
- if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid))
- return false;
-
- /* 255.255.255.255@net is wildcard for all addresses in a network */
- return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY);
-}
-
-static bool
-lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
- if (!lnet_fault_nid_match(attr->fa_src, src) ||
- !lnet_fault_nid_match(attr->fa_dst, dst))
- return false;
-
- if (!(attr->fa_msg_mask & (1 << type)))
- return false;
-
- /**
- * NB: ACK and REPLY have no portal, but they should have been
- * rejected by message mask
- */
- if (attr->fa_ptl_mask && /* has portal filter */
- !(attr->fa_ptl_mask & (1ULL << portal)))
- return false;
-
- return true;
-}
-
-static int
-lnet_fault_attr_validate(struct lnet_fault_attr *attr)
-{
- if (!attr->fa_msg_mask)
- attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */
-
- if (!attr->fa_ptl_mask) /* no portal filter */
- return 0;
-
- /* NB: only PUT and GET can be filtered if portal filter has been set */
- attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT;
- if (!attr->fa_msg_mask) {
- CDEBUG(D_NET, "can't find valid message type bits %x\n",
- attr->fa_msg_mask);
- return -EINVAL;
- }
- return 0;
-}
-
-static void
-lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type)
-{
- /* NB: fs_counter is NOT updated by this function */
- switch (type) {
- case LNET_MSG_PUT:
- stat->fs_put++;
- return;
- case LNET_MSG_ACK:
- stat->fs_ack++;
- return;
- case LNET_MSG_GET:
- stat->fs_get++;
- return;
- case LNET_MSG_REPLY:
- stat->fs_reply++;
- return;
- }
-}
-
-/**
- * LNet message drop simulation
- */
-
-/**
- * Add a new drop rule to LNet
- * There is no check for duplicated drop rule, all rules will be checked for
- * incoming message.
- */
-static int
-lnet_drop_rule_add(struct lnet_fault_attr *attr)
-{
- struct lnet_drop_rule *rule;
-
- if (attr->u.drop.da_rate & attr->u.drop.da_interval) {
- CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n",
- attr->u.drop.da_rate, attr->u.drop.da_interval);
- return -EINVAL;
- }
-
- if (lnet_fault_attr_validate(attr))
- return -EINVAL;
-
- rule = kzalloc(sizeof(*rule), GFP_NOFS);
- if (!rule)
- return -ENOMEM;
-
- spin_lock_init(&rule->dr_lock);
-
- rule->dr_attr = *attr;
- if (attr->u.drop.da_interval) {
- rule->dr_time_base = cfs_time_shift(attr->u.drop.da_interval);
- rule->dr_drop_time = cfs_time_shift(
- prandom_u32_max(attr->u.drop.da_interval));
- } else {
- rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- list_add(&rule->dr_link, &the_lnet.ln_drop_rules);
- lnet_net_unlock(LNET_LOCK_EX);
-
- CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n",
- libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
- attr->u.drop.da_rate, attr->u.drop.da_interval);
- return 0;
-}
-
-/**
- * Remove matched drop rules from lnet, all rules that can match \a src and
- * \a dst will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- * If both of them are zero, all rules will be removed
- */
-static int
-lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst)
-{
- struct lnet_drop_rule *rule;
- struct lnet_drop_rule *tmp;
- struct list_head zombies;
- int n = 0;
-
- INIT_LIST_HEAD(&zombies);
-
- lnet_net_lock(LNET_LOCK_EX);
- list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) {
- if (rule->dr_attr.fa_src != src && src)
- continue;
-
- if (rule->dr_attr.fa_dst != dst && dst)
- continue;
-
- list_move(&rule->dr_link, &zombies);
- }
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &zombies, dr_link) {
- CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n",
- libcfs_nid2str(rule->dr_attr.fa_src),
- libcfs_nid2str(rule->dr_attr.fa_dst),
- rule->dr_attr.u.drop.da_rate,
- rule->dr_attr.u.drop.da_interval);
-
- list_del(&rule->dr_link);
- kfree(rule);
- n++;
- }
-
- return n;
-}
-
-/**
- * List drop rule at position of \a pos
- */
-static int
-lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr,
- struct lnet_fault_stat *stat)
-{
- struct lnet_drop_rule *rule;
- int cpt;
- int i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- if (i++ < pos)
- continue;
-
- spin_lock(&rule->dr_lock);
- *attr = rule->dr_attr;
- *stat = rule->dr_stat;
- spin_unlock(&rule->dr_lock);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-/**
- * reset counters for all drop rules
- */
-static void
-lnet_drop_rule_reset(void)
-{
- struct lnet_drop_rule *rule;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- struct lnet_fault_attr *attr = &rule->dr_attr;
-
- spin_lock(&rule->dr_lock);
-
- memset(&rule->dr_stat, 0, sizeof(rule->dr_stat));
- if (attr->u.drop.da_rate) {
- rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
- } else {
- rule->dr_drop_time = cfs_time_shift(
- prandom_u32_max(attr->u.drop.da_interval));
- rule->dr_time_base = cfs_time_shift(attr->u.drop.da_interval);
- }
- spin_unlock(&rule->dr_lock);
- }
-
- lnet_net_unlock(cpt);
-}
-
-/**
- * check source/destination NID, portal, message type and drop rate,
- * decide whether should drop this message or not
- */
-static bool
-drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
- struct lnet_fault_attr *attr = &rule->dr_attr;
- bool drop;
-
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
- return false;
-
- /* match this rule, check drop rate now */
- spin_lock(&rule->dr_lock);
- if (rule->dr_drop_time) { /* time based drop */
- unsigned long now = cfs_time_current();
-
- rule->dr_stat.fs_count++;
- drop = cfs_time_aftereq(now, rule->dr_drop_time);
- if (drop) {
- if (cfs_time_after(now, rule->dr_time_base))
- rule->dr_time_base = now;
-
- rule->dr_drop_time = rule->dr_time_base +
- prandom_u32_max(attr->u.drop.da_interval) * HZ;
- rule->dr_time_base += attr->u.drop.da_interval * HZ;
-
- CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dr_drop_time);
- }
-
- } else { /* rate based drop */
- drop = rule->dr_stat.fs_count++ == rule->dr_drop_at;
-
- if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) {
- rule->dr_drop_at = rule->dr_stat.fs_count +
- prandom_u32_max(attr->u.drop.da_rate);
- CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst), rule->dr_drop_at);
- }
- }
-
- if (drop) { /* drop this message, update counters */
- lnet_fault_stat_inc(&rule->dr_stat, type);
- rule->dr_stat.u.drop.ds_dropped++;
- }
-
- spin_unlock(&rule->dr_lock);
- return drop;
-}
-
-/**
- * Check if message from \a src to \a dst can match any existed drop rule
- */
-bool
-lnet_drop_rule_match(struct lnet_hdr *hdr)
-{
- struct lnet_drop_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
- bool drop = false;
- int cpt;
-
- /**
- * NB: if Portal is specified, then only PUT and GET will be
- * filtered by drop rule
- */
- if (typ == LNET_MSG_PUT)
- ptl = le32_to_cpu(hdr->msg.put.ptl_index);
- else if (typ == LNET_MSG_GET)
- ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- drop = drop_rule_match(rule, src, dst, typ, ptl);
- if (drop)
- break;
- }
-
- lnet_net_unlock(cpt);
- return drop;
-}
-
-/**
- * LNet Delay Simulation
- */
-/** timestamp (second) to send delayed message */
-#define msg_delay_send msg_ev.hdr_data
-
-struct lnet_delay_rule {
- /** link chain on the_lnet.ln_delay_rules */
- struct list_head dl_link;
- /** link chain on delay_dd.dd_sched_rules */
- struct list_head dl_sched_link;
- /** attributes of this rule */
- struct lnet_fault_attr dl_attr;
- /** lock to protect \a below members */
- spinlock_t dl_lock;
- /** refcount of delay rule */
- atomic_t dl_refcount;
- /**
- * the message sequence to delay, which means message is delayed when
- * dl_stat.fs_count == dl_delay_at
- */
- unsigned long dl_delay_at;
- /**
- * seconds to delay the next message, it's exclusive with dl_delay_at
- */
- unsigned long dl_delay_time;
- /** baseline to caculate dl_delay_time */
- unsigned long dl_time_base;
- /** jiffies to send the next delayed message */
- unsigned long dl_msg_send;
- /** delayed message list */
- struct list_head dl_msg_list;
- /** statistic of delayed messages */
- struct lnet_fault_stat dl_stat;
- /** timer to wakeup delay_daemon */
- struct timer_list dl_timer;
-};
-
-struct delay_daemon_data {
- /** serialise rule add/remove */
- struct mutex dd_mutex;
- /** protect rules on \a dd_sched_rules */
- spinlock_t dd_lock;
- /** scheduled delay rules (by timer) */
- struct list_head dd_sched_rules;
- /** daemon thread sleeps at here */
- wait_queue_head_t dd_waitq;
- /** controller (lctl command) wait at here */
- wait_queue_head_t dd_ctl_waitq;
- /** daemon is running */
- unsigned int dd_running;
- /** daemon stopped */
- unsigned int dd_stopped;
-};
-
-static struct delay_daemon_data delay_dd;
-
-static unsigned long
-round_timeout(unsigned long timeout)
-{
- return (unsigned int)rounddown(timeout, HZ) + HZ;
-}
-
-static void
-delay_rule_decref(struct lnet_delay_rule *rule)
-{
- if (atomic_dec_and_test(&rule->dl_refcount)) {
- LASSERT(list_empty(&rule->dl_sched_link));
- LASSERT(list_empty(&rule->dl_msg_list));
- LASSERT(list_empty(&rule->dl_link));
-
- kfree(rule);
- }
-}
-
-/**
- * check source/destination NID, portal, message type and delay rate,
- * decide whether should delay this message or not
- */
-static bool
-delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal,
- struct lnet_msg *msg)
-{
- struct lnet_fault_attr *attr = &rule->dl_attr;
- bool delay;
-
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
- return false;
-
- /* match this rule, check delay rate now */
- spin_lock(&rule->dl_lock);
- if (rule->dl_delay_time) { /* time based delay */
- unsigned long now = cfs_time_current();
-
- rule->dl_stat.fs_count++;
- delay = cfs_time_aftereq(now, rule->dl_delay_time);
- if (delay) {
- if (cfs_time_after(now, rule->dl_time_base))
- rule->dl_time_base = now;
-
- rule->dl_delay_time = rule->dl_time_base +
- prandom_u32_max(attr->u.delay.la_interval) * HZ;
- rule->dl_time_base += attr->u.delay.la_interval * HZ;
-
- CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dl_delay_time);
- }
-
- } else { /* rate based delay */
- delay = rule->dl_stat.fs_count++ == rule->dl_delay_at;
- /* generate the next random rate sequence */
- if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) {
- rule->dl_delay_at = rule->dl_stat.fs_count +
- prandom_u32_max(attr->u.delay.la_rate);
- CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst), rule->dl_delay_at);
- }
- }
-
- if (!delay) {
- spin_unlock(&rule->dl_lock);
- return false;
- }
-
- /* delay this message, update counters */
- lnet_fault_stat_inc(&rule->dl_stat, type);
- rule->dl_stat.u.delay.ls_delayed++;
-
- list_add_tail(&msg->msg_list, &rule->dl_msg_list);
- msg->msg_delay_send = round_timeout(
- cfs_time_shift(attr->u.delay.la_latency));
- if (rule->dl_msg_send == -1) {
- rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
- }
-
- spin_unlock(&rule->dl_lock);
- return true;
-}
-
-/**
- * check if \a msg can match any Delay Rule, receiving of this message
- * will be delayed if there is a match.
- */
-bool
-lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg)
-{
- struct lnet_delay_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
-
- /* NB: called with hold of lnet_net_lock */
-
- /**
- * NB: if Portal is specified, then only PUT and GET will be
- * filtered by delay rule
- */
- if (typ == LNET_MSG_PUT)
- ptl = le32_to_cpu(hdr->msg.put.ptl_index);
- else if (typ == LNET_MSG_GET)
- ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- if (delay_rule_match(rule, src, dst, typ, ptl, msg))
- return true;
- }
-
- return false;
-}
-
-/** check out delayed messages for send */
-static void
-delayed_msg_check(struct lnet_delay_rule *rule, bool all,
- struct list_head *msg_list)
-{
- struct lnet_msg *msg;
- struct lnet_msg *tmp;
- unsigned long now = cfs_time_current();
-
- if (!all && rule->dl_msg_send > now)
- return;
-
- spin_lock(&rule->dl_lock);
- list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) {
- if (!all && msg->msg_delay_send > now)
- break;
-
- msg->msg_delay_send = 0;
- list_move_tail(&msg->msg_list, msg_list);
- }
-
- if (list_empty(&rule->dl_msg_list)) {
- del_timer(&rule->dl_timer);
- rule->dl_msg_send = -1;
-
- } else if (!list_empty(msg_list)) {
- /*
- * dequeued some timedout messages, update timer for the
- * next delayed message on rule
- */
- msg = list_entry(rule->dl_msg_list.next,
- struct lnet_msg, msg_list);
- rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
- }
- spin_unlock(&rule->dl_lock);
-}
-
-static void
-delayed_msg_process(struct list_head *msg_list, bool drop)
-{
- struct lnet_msg *msg;
-
- while (!list_empty(msg_list)) {
- struct lnet_ni *ni;
- int cpt;
- int rc;
-
- msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
- LASSERT(msg->msg_rxpeer);
-
- ni = msg->msg_rxpeer->lp_ni;
- cpt = msg->msg_rx_cpt;
-
- list_del_init(&msg->msg_list);
- if (drop) {
- rc = -ECANCELED;
-
- } else if (!msg->msg_routing) {
- rc = lnet_parse_local(ni, msg);
- if (!rc)
- continue;
-
- } else {
- lnet_net_lock(cpt);
- rc = lnet_parse_forward_locked(ni, msg);
- lnet_net_unlock(cpt);
-
- switch (rc) {
- case LNET_CREDIT_OK:
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- 0, msg->msg_len, msg->msg_len);
- /* fall through */
- case LNET_CREDIT_WAIT:
- continue;
- default: /* failures */
- break;
- }
- }
-
- lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len);
- lnet_finalize(ni, msg, rc);
- }
-}
-
-/**
- * Process delayed messages for scheduled rules
- * This function can either be called by delay_rule_daemon, or by lnet_finalise
- */
-void
-lnet_delay_rule_check(void)
-{
- struct lnet_delay_rule *rule;
- struct list_head msgs;
-
- INIT_LIST_HEAD(&msgs);
- while (1) {
- if (list_empty(&delay_dd.dd_sched_rules))
- break;
-
- spin_lock_bh(&delay_dd.dd_lock);
- if (list_empty(&delay_dd.dd_sched_rules)) {
- spin_unlock_bh(&delay_dd.dd_lock);
- break;
- }
-
- rule = list_entry(delay_dd.dd_sched_rules.next,
- struct lnet_delay_rule, dl_sched_link);
- list_del_init(&rule->dl_sched_link);
- spin_unlock_bh(&delay_dd.dd_lock);
-
- delayed_msg_check(rule, false, &msgs);
- delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */
- }
-
- if (!list_empty(&msgs))
- delayed_msg_process(&msgs, false);
-}
-
-/** daemon thread to handle delayed messages */
-static int
-lnet_delay_rule_daemon(void *arg)
-{
- delay_dd.dd_running = 1;
- wake_up(&delay_dd.dd_ctl_waitq);
-
- while (delay_dd.dd_running) {
- wait_event_interruptible(delay_dd.dd_waitq,
- !delay_dd.dd_running ||
- !list_empty(&delay_dd.dd_sched_rules));
- lnet_delay_rule_check();
- }
-
- /* in case more rules have been enqueued after my last check */
- lnet_delay_rule_check();
- delay_dd.dd_stopped = 1;
- wake_up(&delay_dd.dd_ctl_waitq);
-
- return 0;
-}
-
-static void
-delay_timer_cb(struct timer_list *t)
-{
- struct lnet_delay_rule *rule = from_timer(rule, t, dl_timer);
-
- spin_lock_bh(&delay_dd.dd_lock);
- if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) {
- atomic_inc(&rule->dl_refcount);
- list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules);
- wake_up(&delay_dd.dd_waitq);
- }
- spin_unlock_bh(&delay_dd.dd_lock);
-}
-
-/**
- * Add a new delay rule to LNet
- * There is no check for duplicated delay rule, all rules will be checked for
- * incoming message.
- */
-int
-lnet_delay_rule_add(struct lnet_fault_attr *attr)
-{
- struct lnet_delay_rule *rule;
- int rc = 0;
-
- if (attr->u.delay.la_rate & attr->u.delay.la_interval) {
- CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n",
- attr->u.delay.la_rate, attr->u.delay.la_interval);
- return -EINVAL;
- }
-
- if (!attr->u.delay.la_latency) {
- CDEBUG(D_NET, "delay latency cannot be zero\n");
- return -EINVAL;
- }
-
- if (lnet_fault_attr_validate(attr))
- return -EINVAL;
-
- rule = kzalloc(sizeof(*rule), GFP_NOFS);
- if (!rule)
- return -ENOMEM;
-
- mutex_lock(&delay_dd.dd_mutex);
- if (!delay_dd.dd_running) {
- struct task_struct *task;
-
- /**
- * NB: although LND threads will process delayed message
- * in lnet_finalize, but there is no guarantee that LND
- * threads will be waken up if no other message needs to
- * be handled.
- * Only one daemon thread, performance is not the concern
- * of this simualation module.
- */
- task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- goto failed;
- }
- wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running);
- }
-
- timer_setup(&rule->dl_timer, delay_timer_cb, 0);
-
- spin_lock_init(&rule->dl_lock);
- INIT_LIST_HEAD(&rule->dl_msg_list);
- INIT_LIST_HEAD(&rule->dl_sched_link);
-
- rule->dl_attr = *attr;
- if (attr->u.delay.la_interval) {
- rule->dl_time_base = cfs_time_shift(attr->u.delay.la_interval);
- rule->dl_delay_time = cfs_time_shift(
- prandom_u32_max(attr->u.delay.la_interval));
- } else {
- rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
- }
-
- rule->dl_msg_send = -1;
-
- lnet_net_lock(LNET_LOCK_EX);
- atomic_set(&rule->dl_refcount, 1);
- list_add(&rule->dl_link, &the_lnet.ln_delay_rules);
- lnet_net_unlock(LNET_LOCK_EX);
-
- CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n",
- libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
- attr->u.delay.la_rate);
-
- mutex_unlock(&delay_dd.dd_mutex);
- return 0;
-failed:
- mutex_unlock(&delay_dd.dd_mutex);
- kfree(rule);
- return rc;
-}
-
-/**
- * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src
- * and \a dst are zero, all rules will be removed, otherwise only matched rules
- * will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- *
- * When a delay rule is removed, all delayed messages of this rule will be
- * processed immediately.
- */
-int
-lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown)
-{
- struct lnet_delay_rule *rule;
- struct lnet_delay_rule *tmp;
- struct list_head rule_list;
- struct list_head msg_list;
- int n = 0;
- bool cleanup;
-
- INIT_LIST_HEAD(&rule_list);
- INIT_LIST_HEAD(&msg_list);
-
- if (shutdown) {
- src = 0;
- dst = 0;
- }
-
- mutex_lock(&delay_dd.dd_mutex);
- lnet_net_lock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) {
- if (rule->dl_attr.fa_src != src && src)
- continue;
-
- if (rule->dl_attr.fa_dst != dst && dst)
- continue;
-
- CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n",
- libcfs_nid2str(rule->dl_attr.fa_src),
- libcfs_nid2str(rule->dl_attr.fa_dst),
- rule->dl_attr.u.delay.la_rate,
- rule->dl_attr.u.delay.la_interval);
- /* refcount is taken over by rule_list */
- list_move(&rule->dl_link, &rule_list);
- }
-
- /* check if we need to shutdown delay_daemon */
- cleanup = list_empty(&the_lnet.ln_delay_rules) &&
- !list_empty(&rule_list);
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) {
- list_del_init(&rule->dl_link);
-
- del_timer_sync(&rule->dl_timer);
- delayed_msg_check(rule, true, &msg_list);
- delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */
- n++;
- }
-
- if (cleanup) { /* no more delay rule, shutdown delay_daemon */
- LASSERT(delay_dd.dd_running);
- delay_dd.dd_running = 0;
- wake_up(&delay_dd.dd_waitq);
-
- while (!delay_dd.dd_stopped)
- wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped);
- }
- mutex_unlock(&delay_dd.dd_mutex);
-
- if (!list_empty(&msg_list))
- delayed_msg_process(&msg_list, shutdown);
-
- return n;
-}
-
-/**
- * List Delay Rule at position of \a pos
- */
-int
-lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
- struct lnet_fault_stat *stat)
-{
- struct lnet_delay_rule *rule;
- int cpt;
- int i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- if (i++ < pos)
- continue;
-
- spin_lock(&rule->dl_lock);
- *attr = rule->dl_attr;
- *stat = rule->dl_stat;
- spin_unlock(&rule->dl_lock);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-/**
- * reset counters for all Delay Rules
- */
-void
-lnet_delay_rule_reset(void)
-{
- struct lnet_delay_rule *rule;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- struct lnet_fault_attr *attr = &rule->dl_attr;
-
- spin_lock(&rule->dl_lock);
-
- memset(&rule->dl_stat, 0, sizeof(rule->dl_stat));
- if (attr->u.delay.la_rate) {
- rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
- } else {
- rule->dl_delay_time =
- cfs_time_shift(prandom_u32_max(
- attr->u.delay.la_interval));
- rule->dl_time_base = cfs_time_shift(attr->u.delay.la_interval);
- }
- spin_unlock(&rule->dl_lock);
- }
-
- lnet_net_unlock(cpt);
-}
-
-int
-lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data)
-{
- struct lnet_fault_attr *attr;
- struct lnet_fault_stat *stat;
-
- attr = (struct lnet_fault_attr *)data->ioc_inlbuf1;
-
- switch (opc) {
- default:
- return -EINVAL;
-
- case LNET_CTL_DROP_ADD:
- if (!attr)
- return -EINVAL;
-
- return lnet_drop_rule_add(attr);
-
- case LNET_CTL_DROP_DEL:
- if (!attr)
- return -EINVAL;
-
- data->ioc_count = lnet_drop_rule_del(attr->fa_src,
- attr->fa_dst);
- return 0;
-
- case LNET_CTL_DROP_RESET:
- lnet_drop_rule_reset();
- return 0;
-
- case LNET_CTL_DROP_LIST:
- stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
- if (!attr || !stat)
- return -EINVAL;
-
- return lnet_drop_rule_list(data->ioc_count, attr, stat);
-
- case LNET_CTL_DELAY_ADD:
- if (!attr)
- return -EINVAL;
-
- return lnet_delay_rule_add(attr);
-
- case LNET_CTL_DELAY_DEL:
- if (!attr)
- return -EINVAL;
-
- data->ioc_count = lnet_delay_rule_del(attr->fa_src,
- attr->fa_dst, false);
- return 0;
-
- case LNET_CTL_DELAY_RESET:
- lnet_delay_rule_reset();
- return 0;
-
- case LNET_CTL_DELAY_LIST:
- stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
- if (!attr || !stat)
- return -EINVAL;
-
- return lnet_delay_rule_list(data->ioc_count, attr, stat);
- }
-}
-
-int
-lnet_fault_init(void)
-{
- BUILD_BUG_ON(LNET_PUT_BIT != 1 << LNET_MSG_PUT);
- BUILD_BUG_ON(LNET_ACK_BIT != 1 << LNET_MSG_ACK);
- BUILD_BUG_ON(LNET_GET_BIT != 1 << LNET_MSG_GET);
- BUILD_BUG_ON(LNET_REPLY_BIT != 1 << LNET_MSG_REPLY);
-
- mutex_init(&delay_dd.dd_mutex);
- spin_lock_init(&delay_dd.dd_lock);
- init_waitqueue_head(&delay_dd.dd_waitq);
- init_waitqueue_head(&delay_dd.dd_ctl_waitq);
- INIT_LIST_HEAD(&delay_dd.dd_sched_rules);
-
- return 0;
-}
-
-void
-lnet_fault_fini(void)
-{
- lnet_drop_rule_del(0, 0);
- lnet_delay_rule_del(0, 0, true);
-
- LASSERT(list_empty(&the_lnet.ln_drop_rules));
- LASSERT(list_empty(&the_lnet.ln_delay_rules));
- LASSERT(list_empty(&delay_dd.dd_sched_rules));
-}
diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c
deleted file mode 100644
index 3aba1421c741..000000000000
--- a/drivers/staging/lustre/lnet/lnet/nidstrings.c
+++ /dev/null
@@ -1,1258 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/nidstrings.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-#include <uapi/linux/lnet/nidstr.h>
-
-/* max value for numeric network address */
-#define MAX_NUMERIC_VALUE 0xffffffff
-
-#define IPSTRING_LENGTH 16
-
-/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
- * consistent in all conversion functions. Some code fragments are copied
- * around for the sake of clarity...
- */
-
-/* CAVEAT EMPTOR! Racey temporary buffer allocation!
- * Choose the number of nidstrings to support the MAXIMUM expected number of
- * concurrent users. If there are more, the returned string will be volatile.
- * NB this number must allow for a process to be descheduled for a timeslice
- * between getting its string and using it.
- */
-
-static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
-static int libcfs_nidstring_idx;
-
-static DEFINE_SPINLOCK(libcfs_nidstring_lock);
-
-static struct netstrfns *libcfs_namenum2netstrfns(const char *name);
-
-char *
-libcfs_next_nidstring(void)
-{
- char *str;
- unsigned long flags;
-
- spin_lock_irqsave(&libcfs_nidstring_lock, flags);
-
- str = libcfs_nidstrings[libcfs_nidstring_idx++];
- if (libcfs_nidstring_idx == ARRAY_SIZE(libcfs_nidstrings))
- libcfs_nidstring_idx = 0;
-
- spin_unlock_irqrestore(&libcfs_nidstring_lock, flags);
- return str;
-}
-EXPORT_SYMBOL(libcfs_next_nidstring);
-
-/**
- * Nid range list syntax.
- * \verbatim
- *
- * <nidlist> :== <nidrange> [ ' ' <nidrange> ]
- * <nidrange> :== <addrrange> '@' <net>
- * <addrrange> :== '*' |
- * <ipaddr_range> |
- * <cfs_expr_list>
- * <ipaddr_range> :== <cfs_expr_list>.<cfs_expr_list>.<cfs_expr_list>.
- * <cfs_expr_list>
- * <cfs_expr_list> :== <number> |
- * <expr_list>
- * <expr_list> :== '[' <range_expr> [ ',' <range_expr>] ']'
- * <range_expr> :== <number> |
- * <number> '-' <number> |
- * <number> '-' <number> '/' <number>
- * <net> :== <netname> | <netname><number>
- * <netname> :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" |
- * "vib" | "ra" | "elan" | "mx" | "ptl"
- * \endverbatim
- */
-
-/**
- * Structure to represent \<nidrange\> token of the syntax.
- *
- * One of this is created for each \<net\> parsed.
- */
-struct nidrange {
- /**
- * Link to list of this structures which is built on nid range
- * list parsing.
- */
- struct list_head nr_link;
- /**
- * List head for addrrange::ar_link.
- */
- struct list_head nr_addrranges;
- /**
- * Flag indicating that *@<net> is found.
- */
- int nr_all;
- /**
- * Pointer to corresponding element of libcfs_netstrfns.
- */
- struct netstrfns *nr_netstrfns;
- /**
- * Number of network. E.g. 5 if \<net\> is "elan5".
- */
- int nr_netnum;
-};
-
-/**
- * Structure to represent \<addrrange\> token of the syntax.
- */
-struct addrrange {
- /**
- * Link to nidrange::nr_addrranges.
- */
- struct list_head ar_link;
- /**
- * List head for cfs_expr_list::el_list.
- */
- struct list_head ar_numaddr_ranges;
-};
-
-/**
- * Parses \<addrrange\> token on the syntax.
- *
- * Allocates struct addrrange and links to \a nidrange via
- * (nidrange::nr_addrranges)
- *
- * \retval 0 if \a src parses to '*' | \<ipaddr_range\> | \<cfs_expr_list\>
- * \retval -errno otherwise
- */
-static int
-parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange)
-{
- struct addrrange *addrrange;
-
- if (src->ls_len == 1 && src->ls_str[0] == '*') {
- nidrange->nr_all = 1;
- return 0;
- }
-
- addrrange = kzalloc(sizeof(struct addrrange), GFP_NOFS);
- if (!addrrange)
- return -ENOMEM;
- list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges);
- INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges);
-
- return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str,
- src->ls_len,
- &addrrange->ar_numaddr_ranges);
-}
-
-/**
- * Finds or creates struct nidrange.
- *
- * Checks if \a src is a valid network name, looks for corresponding
- * nidrange on the ist of nidranges (\a nidlist), creates new struct
- * nidrange if it is not found.
- *
- * \retval pointer to struct nidrange matching network specified via \a src
- * \retval NULL if \a src does not match any network
- */
-static struct nidrange *
-add_nidrange(const struct cfs_lstr *src,
- struct list_head *nidlist)
-{
- struct netstrfns *nf;
- struct nidrange *nr;
- int endlen;
- unsigned int netnum;
-
- if (src->ls_len >= LNET_NIDSTR_SIZE)
- return NULL;
-
- nf = libcfs_namenum2netstrfns(src->ls_str);
- if (!nf)
- return NULL;
- endlen = src->ls_len - strlen(nf->nf_name);
- if (!endlen)
- /* network name only, e.g. "elan" or "tcp" */
- netnum = 0;
- else {
- /*
- * e.g. "elan25" or "tcp23", refuse to parse if
- * network name is not appended with decimal or
- * hexadecimal number
- */
- if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name),
- endlen, &netnum, 0, MAX_NUMERIC_VALUE))
- return NULL;
- }
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (nr->nr_netstrfns != nf)
- continue;
- if (nr->nr_netnum != netnum)
- continue;
- return nr;
- }
-
- nr = kzalloc(sizeof(struct nidrange), GFP_NOFS);
- if (!nr)
- return NULL;
- list_add_tail(&nr->nr_link, nidlist);
- INIT_LIST_HEAD(&nr->nr_addrranges);
- nr->nr_netstrfns = nf;
- nr->nr_all = 0;
- nr->nr_netnum = netnum;
-
- return nr;
-}
-
-/**
- * Parses \<nidrange\> token of the syntax.
- *
- * \retval 1 if \a src parses to \<addrrange\> '@' \<net\>
- * \retval 0 otherwise
- */
-static int
-parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist)
-{
- struct cfs_lstr addrrange;
- struct cfs_lstr net;
- struct nidrange *nr;
-
- if (!cfs_gettok(src, '@', &addrrange))
- goto failed;
-
- if (!cfs_gettok(src, '@', &net) || src->ls_str)
- goto failed;
-
- nr = add_nidrange(&net, nidlist);
- if (!nr)
- goto failed;
-
- if (parse_addrange(&addrrange, nr))
- goto failed;
-
- return 1;
-failed:
- return 0;
-}
-
-/**
- * Frees addrrange structures of \a list.
- *
- * For each struct addrrange structure found on \a list it frees
- * cfs_expr_list list attached to it and frees the addrrange itself.
- *
- * \retval none
- */
-static void
-free_addrranges(struct list_head *list)
-{
- while (!list_empty(list)) {
- struct addrrange *ar;
-
- ar = list_entry(list->next, struct addrrange, ar_link);
-
- cfs_expr_list_free_list(&ar->ar_numaddr_ranges);
- list_del(&ar->ar_link);
- kfree(ar);
- }
-}
-
-/**
- * Frees nidrange strutures of \a list.
- *
- * For each struct nidrange structure found on \a list it frees
- * addrrange list attached to it and frees the nidrange itself.
- *
- * \retval none
- */
-void
-cfs_free_nidlist(struct list_head *list)
-{
- struct list_head *pos, *next;
- struct nidrange *nr;
-
- list_for_each_safe(pos, next, list) {
- nr = list_entry(pos, struct nidrange, nr_link);
- free_addrranges(&nr->nr_addrranges);
- list_del(pos);
- kfree(nr);
- }
-}
-EXPORT_SYMBOL(cfs_free_nidlist);
-
-/**
- * Parses nid range list.
- *
- * Parses with rigorous syntax and overflow checking \a str into
- * \<nidrange\> [ ' ' \<nidrange\> ], compiles \a str into set of
- * structures and links that structure to \a nidlist. The resulting
- * list can be used to match a NID againts set of NIDS defined by \a
- * str.
- * \see cfs_match_nid
- *
- * \retval 1 on success
- * \retval 0 otherwise
- */
-int
-cfs_parse_nidlist(char *str, int len, struct list_head *nidlist)
-{
- struct cfs_lstr src;
- struct cfs_lstr res;
- int rc;
-
- src.ls_str = str;
- src.ls_len = len;
- INIT_LIST_HEAD(nidlist);
- while (src.ls_str) {
- rc = cfs_gettok(&src, ' ', &res);
- if (!rc) {
- cfs_free_nidlist(nidlist);
- return 0;
- }
- rc = parse_nidrange(&res, nidlist);
- if (!rc) {
- cfs_free_nidlist(nidlist);
- return 0;
- }
- }
- return 1;
-}
-EXPORT_SYMBOL(cfs_parse_nidlist);
-
-/**
- * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist).
- *
- * \see cfs_parse_nidlist()
- *
- * \retval 1 on match
- * \retval 0 otherwises
- */
-int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid)))
- continue;
- if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid)))
- continue;
- if (nr->nr_all)
- return 1;
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link)
- if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid),
- &ar->ar_numaddr_ranges))
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(cfs_match_nid);
-
-/**
- * Print the network part of the nidrange \a nr into the specified \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_network(char *buffer, int count, struct nidrange *nr)
-{
- struct netstrfns *nf = nr->nr_netstrfns;
-
- if (!nr->nr_netnum)
- return scnprintf(buffer, count, "@%s", nf->nf_name);
- else
- return scnprintf(buffer, count, "@%s%u",
- nf->nf_name, nr->nr_netnum);
-}
-
-/**
- * Print a list of addrrange (\a addrranges) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges,
- struct nidrange *nr)
-{
- int i = 0;
- struct addrrange *ar;
- struct netstrfns *nf = nr->nr_netstrfns;
-
- list_for_each_entry(ar, addrranges, ar_link) {
- if (i)
- i += scnprintf(buffer + i, count - i, " ");
- i += nf->nf_print_addrlist(buffer + i, count - i,
- &ar->ar_numaddr_ranges);
- i += cfs_print_network(buffer + i, count - i, nr);
- }
- return i;
-}
-
-/**
- * Print a list of nidranges (\a nidlist) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- * Nidranges are separated by a space character.
- *
- * \retval number of characters written
- */
-int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist)
-{
- int i = 0;
- struct nidrange *nr;
-
- if (count <= 0)
- return 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (i)
- i += scnprintf(buffer + i, count - i, " ");
-
- if (nr->nr_all) {
- LASSERT(list_empty(&nr->nr_addrranges));
- i += scnprintf(buffer + i, count - i, "*");
- i += cfs_print_network(buffer + i, count - i, nr);
- } else {
- i += cfs_print_addrranges(buffer + i, count - i,
- &nr->nr_addrranges, nr);
- }
- }
- return i;
-}
-EXPORT_SYMBOL(cfs_print_nidlist);
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param ar
- * \param min_nid
- * \param max_nid
- */
-static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- __u32 tmp_ip_addr = 0;
- unsigned int min_ip[4] = {0};
- unsigned int max_ip[4] = {0};
- int re_count = 0;
-
- list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
- list_for_each_entry(re, &el->el_exprs, re_link) {
- min_ip[re_count] = re->re_lo;
- max_ip[re_count] = re->re_hi;
- re_count++;
- }
- }
-
- tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) |
- (min_ip[2] << 8) | min_ip[3]);
-
- if (min_nid)
- *min_nid = tmp_ip_addr;
-
- tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) |
- (max_ip[2] << 8) | max_ip[3]);
-
- if (max_nid)
- *max_nid = tmp_ip_addr;
-}
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param ar
- * \param min_nid
- * \param max_nid
- */
-static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- unsigned int min_addr = 0;
- unsigned int max_addr = 0;
-
- list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
- list_for_each_entry(re, &el->el_exprs, re_link) {
- if (re->re_lo < min_addr || !min_addr)
- min_addr = re->re_lo;
- if (re->re_hi > max_addr)
- max_addr = re->re_hi;
- }
- }
-
- if (min_nid)
- *min_nid = min_addr;
- if (max_nid)
- *max_nid = max_addr;
-}
-
-/**
- * Determines whether an expression list in an nidrange contains exactly
- * one contiguous address range. Calls the correct netstrfns for the LND
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-bool cfs_nidrange_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct netstrfns *nf = NULL;
- char *lndname = NULL;
- int netnum = -1;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- nf = nr->nr_netstrfns;
- if (!lndname)
- lndname = nf->nf_name;
- if (netnum == -1)
- netnum = nr->nr_netnum;
-
- if (strcmp(lndname, nf->nf_name) ||
- netnum != nr->nr_netnum)
- return false;
- }
-
- if (!nf)
- return false;
-
- if (!nf->nf_is_contiguous(nidlist))
- return false;
-
- return true;
-}
-EXPORT_SYMBOL(cfs_nidrange_is_contiguous);
-
-/**
- * Determines whether an expression list in an num nidrange contains exactly
- * one contiguous address range.
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-static bool cfs_num_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- int last_hi = 0;
- __u32 last_end_nid = 0;
- __u32 current_start_nid = 0;
- __u32 current_end_nid = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_num_ar_min_max(ar, &current_start_nid,
- &current_end_nid);
- if (last_end_nid &&
- (current_start_nid - last_end_nid != 1))
- return false;
- last_end_nid = current_end_nid;
- list_for_each_entry(el, &ar->ar_numaddr_ranges,
- el_link) {
- list_for_each_entry(re, &el->el_exprs,
- re_link) {
- if (re->re_stride > 1)
- return false;
- else if (last_hi &&
- re->re_hi - last_hi != 1)
- return false;
- last_hi = re->re_hi;
- }
- }
- }
- }
-
- return true;
-}
-
-/**
- * Determines whether an expression list in an ip nidrange contains exactly
- * one contiguous address range.
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-static bool cfs_ip_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- int expr_count;
- int last_hi = 255;
- int last_diff = 0;
- __u32 last_end_nid = 0;
- __u32 current_start_nid = 0;
- __u32 current_end_nid = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- last_hi = 255;
- last_diff = 0;
- cfs_ip_ar_min_max(ar, &current_start_nid,
- &current_end_nid);
- if (last_end_nid &&
- (current_start_nid - last_end_nid != 1))
- return false;
- last_end_nid = current_end_nid;
- list_for_each_entry(el, &ar->ar_numaddr_ranges,
- el_link) {
- expr_count = 0;
- list_for_each_entry(re, &el->el_exprs,
- re_link) {
- expr_count++;
- if (re->re_stride > 1 ||
- (last_diff > 0 && last_hi != 255) ||
- (last_diff > 0 && last_hi == 255 &&
- re->re_lo > 0))
- return false;
- last_hi = re->re_hi;
- last_diff = re->re_hi - re->re_lo;
- }
- }
- }
- }
-
- return true;
-}
-
-/**
- * Takes a linked list of nidrange expressions, determines the minimum
- * and maximum nid and creates appropriate nid structures
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
- char *max_nid, size_t nidstr_length)
-{
- struct nidrange *nr;
- struct netstrfns *nf = NULL;
- int netnum = -1;
- __u32 min_addr;
- __u32 max_addr;
- char *lndname = NULL;
- char min_addr_str[IPSTRING_LENGTH];
- char max_addr_str[IPSTRING_LENGTH];
-
- list_for_each_entry(nr, nidlist, nr_link) {
- nf = nr->nr_netstrfns;
- lndname = nf->nf_name;
- if (netnum == -1)
- netnum = nr->nr_netnum;
-
- nf->nf_min_max(nidlist, &min_addr, &max_addr);
- }
- nf->nf_addr2str(min_addr, min_addr_str, sizeof(min_addr_str));
- nf->nf_addr2str(max_addr, max_addr_str, sizeof(max_addr_str));
-
- snprintf(min_nid, nidstr_length, "%s@%s%d", min_addr_str, lndname,
- netnum);
- snprintf(max_nid, nidstr_length, "%s@%s%d", max_addr_str, lndname,
- netnum);
-}
-EXPORT_SYMBOL(cfs_nidrange_find_min_max);
-
-/**
- * Determines the min and max NID values for num LNDs
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- unsigned int tmp_min_addr = 0;
- unsigned int tmp_max_addr = 0;
- unsigned int min_addr = 0;
- unsigned int max_addr = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_num_ar_min_max(ar, &tmp_min_addr,
- &tmp_max_addr);
- if (tmp_min_addr < min_addr || !min_addr)
- min_addr = tmp_min_addr;
- if (tmp_max_addr > max_addr)
- max_addr = tmp_min_addr;
- }
- }
- *max_nid = max_addr;
- *min_nid = min_addr;
-}
-
-/**
- * Takes an nidlist and determines the minimum and maximum
- * ip addresses.
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- __u32 tmp_min_ip_addr = 0;
- __u32 tmp_max_ip_addr = 0;
- __u32 min_ip_addr = 0;
- __u32 max_ip_addr = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_ip_ar_min_max(ar, &tmp_min_ip_addr,
- &tmp_max_ip_addr);
- if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr)
- min_ip_addr = tmp_min_ip_addr;
- if (tmp_max_ip_addr > max_ip_addr)
- max_ip_addr = tmp_max_ip_addr;
- }
- }
-
- if (min_nid)
- *min_nid = min_ip_addr;
- if (max_nid)
- *max_nid = max_ip_addr;
-}
-
-static int
-libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
-{
- *addr = 0;
- return 1;
-}
-
-static void
-libcfs_ip_addr2str(__u32 addr, char *str, size_t size)
-{
- snprintf(str, size, "%u.%u.%u.%u",
- (addr >> 24) & 0xff, (addr >> 16) & 0xff,
- (addr >> 8) & 0xff, addr & 0xff);
-}
-
-/*
- * CAVEAT EMPTOR XscanfX
- * I use "%n" at the end of a sscanf format to detect trailing junk. However
- * sscanf may return immediately if it sees the terminating '0' in a string, so
- * I initialise the %n variable to the expected length. If sscanf sets it;
- * fine, if it doesn't, then the scan ended at the end of the string, which is
- * fine too :)
- */
-static int
-libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
-{
- unsigned int a;
- unsigned int b;
- unsigned int c;
- unsigned int d;
- int n = nob; /* XscanfX */
-
- /* numeric IP? */
- if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
- n == nob &&
- !(a & ~0xff) && !(b & ~0xff) &&
- !(c & ~0xff) && !(d & ~0xff)) {
- *addr = ((a << 24) | (b << 16) | (c << 8) | d);
- return 1;
- }
-
- return 0;
-}
-
-/* Used by lnet/config.c so it can't be static */
-int
-cfs_ip_addr_parse(char *str, int len, struct list_head *list)
-{
- struct cfs_expr_list *el;
- struct cfs_lstr src;
- int rc;
- int i;
-
- src.ls_str = str;
- src.ls_len = len;
- i = 0;
-
- while (src.ls_str) {
- struct cfs_lstr res;
-
- if (!cfs_gettok(&src, '.', &res)) {
- rc = -EINVAL;
- goto out;
- }
-
- rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el);
- if (rc)
- goto out;
-
- list_add_tail(&el->el_link, list);
- i++;
- }
-
- if (i == 4)
- return 0;
-
- rc = -EINVAL;
-out:
- cfs_expr_list_free_list(list);
-
- return rc;
-}
-
-static int
-libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list)
-{
- int i = 0, j = 0;
- struct cfs_expr_list *el;
-
- list_for_each_entry(el, list, el_link) {
- LASSERT(j++ < 4);
- if (i)
- i += scnprintf(buffer + i, count - i, ".");
- i += cfs_expr_list_print(buffer + i, count - i, el);
- }
- return i;
-}
-
-/**
- * Matches address (\a addr) against address set encoded in \a list.
- *
- * \retval 1 if \a addr matches
- * \retval 0 otherwise
- */
-int
-cfs_ip_addr_match(__u32 addr, struct list_head *list)
-{
- struct cfs_expr_list *el;
- int i = 0;
-
- list_for_each_entry_reverse(el, list, el_link) {
- if (!cfs_expr_list_match(addr & 0xff, el))
- return 0;
- addr >>= 8;
- i++;
- }
-
- return i == 4;
-}
-
-static void
-libcfs_decnum_addr2str(__u32 addr, char *str, size_t size)
-{
- snprintf(str, size, "%u", addr);
-}
-
-static int
-libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
-{
- int n;
-
- n = nob;
- if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- return 0;
-}
-
-/**
- * Nf_parse_addrlist method for networks using numeric addresses.
- *
- * Examples of such networks are gm and elan.
- *
- * \retval 0 if \a str parsed to numeric address
- * \retval errno otherwise
- */
-static int
-libcfs_num_parse(char *str, int len, struct list_head *list)
-{
- struct cfs_expr_list *el;
- int rc;
-
- rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el);
- if (!rc)
- list_add_tail(&el->el_link, list);
-
- return rc;
-}
-
-static int
-libcfs_num_addr_range_print(char *buffer, int count, struct list_head *list)
-{
- int i = 0, j = 0;
- struct cfs_expr_list *el;
-
- list_for_each_entry(el, list, el_link) {
- LASSERT(j++ < 1);
- i += cfs_expr_list_print(buffer + i, count - i, el);
- }
- return i;
-}
-
-/*
- * Nf_match_addr method for networks using numeric addresses
- *
- * \retval 1 on match
- * \retval 0 otherwise
- */
-static int
-libcfs_num_match(__u32 addr, struct list_head *numaddr)
-{
- struct cfs_expr_list *el;
-
- LASSERT(!list_empty(numaddr));
- el = list_entry(numaddr->next, struct cfs_expr_list, el_link);
-
- return cfs_expr_list_match(addr, el);
-}
-
-static struct netstrfns libcfs_netstrfns[] = {
- { .nf_type = LOLND,
- .nf_name = "lo",
- .nf_modname = "klolnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_lo_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match,
- .nf_is_contiguous = cfs_num_is_contiguous,
- .nf_min_max = cfs_num_min_max },
- { .nf_type = SOCKLND,
- .nf_name = "tcp",
- .nf_modname = "ksocklnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
- { .nf_type = O2IBLND,
- .nf_name = "o2ib",
- .nf_modname = "ko2iblnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
- { .nf_type = GNILND,
- .nf_name = "gni",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match,
- .nf_is_contiguous = cfs_num_is_contiguous,
- .nf_min_max = cfs_num_min_max },
- { .nf_type = GNIIPLND,
- .nf_name = "gip",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
-};
-
-static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns);
-
-static struct netstrfns *
-libcfs_lnd2netstrfns(__u32 lnd)
-{
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (lnd == libcfs_netstrfns[i].nf_type)
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-static struct netstrfns *
-libcfs_namenum2netstrfns(const char *name)
-{
- struct netstrfns *nf;
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++) {
- nf = &libcfs_netstrfns[i];
- if (!strncmp(name, nf->nf_name, strlen(nf->nf_name)))
- return nf;
- }
- return NULL;
-}
-
-static struct netstrfns *
-libcfs_name2netstrfns(const char *name)
-{
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (!strcmp(libcfs_netstrfns[i].nf_name, name))
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-int
-libcfs_isknown_lnd(__u32 lnd)
-{
- return !!libcfs_lnd2netstrfns(lnd);
-}
-EXPORT_SYMBOL(libcfs_isknown_lnd);
-
-char *
-libcfs_lnd2modname(__u32 lnd)
-{
- struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
-
- return nf ? nf->nf_modname : NULL;
-}
-EXPORT_SYMBOL(libcfs_lnd2modname);
-
-int
-libcfs_str2lnd(const char *str)
-{
- struct netstrfns *nf = libcfs_name2netstrfns(str);
-
- if (nf)
- return nf->nf_type;
-
- return -ENXIO;
-}
-EXPORT_SYMBOL(libcfs_str2lnd);
-
-char *
-libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size)
-{
- struct netstrfns *nf;
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf)
- snprintf(buf, buf_size, "?%u?", lnd);
- else
- snprintf(buf, buf_size, "%s", nf->nf_name);
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_lnd2str_r);
-
-char *
-libcfs_net2str_r(__u32 net, char *buf, size_t buf_size)
-{
- __u32 nnum = LNET_NETNUM(net);
- __u32 lnd = LNET_NETTYP(net);
- struct netstrfns *nf;
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf)
- snprintf(buf, buf_size, "<%u:%u>", lnd, nnum);
- else if (!nnum)
- snprintf(buf, buf_size, "%s", nf->nf_name);
- else
- snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum);
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_net2str_r);
-
-char *
-libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size)
-{
- __u32 addr = LNET_NIDADDR(nid);
- __u32 net = LNET_NIDNET(nid);
- __u32 nnum = LNET_NETNUM(net);
- __u32 lnd = LNET_NETTYP(net);
- struct netstrfns *nf;
-
- if (nid == LNET_NID_ANY) {
- strncpy(buf, "<?>", buf_size);
- buf[buf_size - 1] = '\0';
- return buf;
- }
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf) {
- snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum);
- } else {
- size_t addr_len;
-
- nf->nf_addr2str(addr, buf, buf_size);
- addr_len = strlen(buf);
- if (!nnum)
- snprintf(buf + addr_len, buf_size - addr_len, "@%s",
- nf->nf_name);
- else
- snprintf(buf + addr_len, buf_size - addr_len, "@%s%u",
- nf->nf_name, nnum);
- }
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_nid2str_r);
-
-static struct netstrfns *
-libcfs_str2net_internal(const char *str, __u32 *net)
-{
- struct netstrfns *nf = NULL;
- int nob;
- unsigned int netnum;
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++) {
- nf = &libcfs_netstrfns[i];
- if (!strncmp(str, nf->nf_name, strlen(nf->nf_name)))
- break;
- }
-
- if (i == libcfs_nnetstrfns)
- return NULL;
-
- nob = strlen(nf->nf_name);
-
- if (strlen(str) == (unsigned int)nob) {
- netnum = 0;
- } else {
- if (nf->nf_type == LOLND) /* net number not allowed */
- return NULL;
-
- str += nob;
- i = strlen(str);
- if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
- i != (int)strlen(str))
- return NULL;
- }
-
- *net = LNET_MKNET(nf->nf_type, netnum);
- return nf;
-}
-
-__u32
-libcfs_str2net(const char *str)
-{
- __u32 net;
-
- if (libcfs_str2net_internal(str, &net))
- return net;
-
- return LNET_NIDNET(LNET_NID_ANY);
-}
-EXPORT_SYMBOL(libcfs_str2net);
-
-lnet_nid_t
-libcfs_str2nid(const char *str)
-{
- const char *sep = strchr(str, '@');
- struct netstrfns *nf;
- __u32 net;
- __u32 addr;
-
- if (sep) {
- nf = libcfs_str2net_internal(sep + 1, &net);
- if (!nf)
- return LNET_NID_ANY;
- } else {
- sep = str + strlen(str);
- net = LNET_MKNET(SOCKLND, 0);
- nf = libcfs_lnd2netstrfns(SOCKLND);
- LASSERT(nf);
- }
-
- if (!nf->nf_str2addr(str, (int)(sep - str), &addr))
- return LNET_NID_ANY;
-
- return LNET_MKNID(net, addr);
-}
-EXPORT_SYMBOL(libcfs_str2nid);
-
-char *
-libcfs_id2str(struct lnet_process_id id)
-{
- char *str = libcfs_next_nidstring();
-
- if (id.pid == LNET_PID_ANY) {
- snprintf(str, LNET_NIDSTR_SIZE,
- "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
- return str;
- }
-
- snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
- id.pid & LNET_PID_USERFLAG ? "U" : "",
- id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid));
- return str;
-}
-EXPORT_SYMBOL(libcfs_id2str);
-
-int
-libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
-{
- if (!strcmp(str, "*")) {
- *nidp = LNET_NID_ANY;
- return 1;
- }
-
- *nidp = libcfs_str2nid(str);
- return *nidp != LNET_NID_ANY;
-}
-EXPORT_SYMBOL(libcfs_str2anynid);
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
deleted file mode 100644
index 3d4caa609c83..000000000000
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ /dev/null
@@ -1,456 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/peer.c
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-int
-lnet_peer_tables_create(void)
-{
- struct lnet_peer_table *ptable;
- struct list_head *hash;
- int i;
- int j;
-
- the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ptable));
- if (!the_lnet.ln_peer_tables) {
- CERROR("Failed to allocate cpu-partition peer tables\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- INIT_LIST_HEAD(&ptable->pt_deathrow);
-
- hash = kvmalloc_cpt(LNET_PEER_HASH_SIZE * sizeof(*hash),
- GFP_KERNEL, i);
- if (!hash) {
- CERROR("Failed to create peer hash table\n");
- lnet_peer_tables_destroy();
- return -ENOMEM;
- }
-
- for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
- INIT_LIST_HEAD(&hash[j]);
- ptable->pt_hash = hash; /* sign of initialization */
- }
-
- return 0;
-}
-
-void
-lnet_peer_tables_destroy(void)
-{
- struct lnet_peer_table *ptable;
- struct list_head *hash;
- int i;
- int j;
-
- if (!the_lnet.ln_peer_tables)
- return;
-
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- hash = ptable->pt_hash;
- if (!hash) /* not initialized */
- break;
-
- LASSERT(list_empty(&ptable->pt_deathrow));
-
- ptable->pt_hash = NULL;
- for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
- LASSERT(list_empty(&hash[j]));
-
- kvfree(hash);
- }
-
- cfs_percpt_free(the_lnet.ln_peer_tables);
- the_lnet.ln_peer_tables = NULL;
-}
-
-static void
-lnet_peer_table_cleanup_locked(struct lnet_ni *ni,
- struct lnet_peer_table *ptable)
-{
- int i;
- struct lnet_peer *lp;
- struct lnet_peer *tmp;
-
- for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
- list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni && ni != lp->lp_ni)
- continue;
- list_del_init(&lp->lp_hashlist);
- /* Lose hash table's ref */
- ptable->pt_zombies++;
- lnet_peer_decref_locked(lp);
- }
- }
-}
-
-static void
-lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
- int cpt_locked)
-{
- int i;
-
- for (i = 3; ptable->pt_zombies; i++) {
- lnet_net_unlock(cpt_locked);
-
- if (is_power_of_2(i)) {
- CDEBUG(D_WARNING,
- "Waiting for %d zombies on peer table\n",
- ptable->pt_zombies);
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ >> 1);
- lnet_net_lock(cpt_locked);
- }
-}
-
-static void
-lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni,
- struct lnet_peer_table *ptable,
- int cpt_locked)
-{
- struct lnet_peer *lp;
- struct lnet_peer *tmp;
- lnet_nid_t lp_nid;
- int i;
-
- for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
- list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni != lp->lp_ni)
- continue;
-
- if (!lp->lp_rtr_refcount)
- continue;
-
- lp_nid = lp->lp_nid;
-
- lnet_net_unlock(cpt_locked);
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
- lnet_net_lock(cpt_locked);
- }
- }
-}
-
-void
-lnet_peer_tables_cleanup(struct lnet_ni *ni)
-{
- struct lnet_peer_table *ptable;
- struct list_head deathrow;
- struct lnet_peer *lp;
- struct lnet_peer *temp;
- int i;
-
- INIT_LIST_HEAD(&deathrow);
-
- LASSERT(the_lnet.ln_shutdown || ni);
- /*
- * If just deleting the peers for a NI, get rid of any routes these
- * peers are gateways for.
- */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_del_rtrs_locked(ni, ptable, i);
- lnet_net_unlock(i);
- }
-
- /*
- * Start the process of moving the applicable peers to
- * deathrow.
- */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_cleanup_locked(ni, ptable);
- lnet_net_unlock(i);
- }
-
- /* Cleanup all entries on deathrow. */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_deathrow_wait_locked(ptable, i);
- list_splice_init(&ptable->pt_deathrow, &deathrow);
- lnet_net_unlock(i);
- }
-
- list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) {
- list_del(&lp->lp_hashlist);
- kfree(lp);
- }
-}
-
-void
-lnet_destroy_peer_locked(struct lnet_peer *lp)
-{
- struct lnet_peer_table *ptable;
-
- LASSERT(!lp->lp_refcount);
- LASSERT(!lp->lp_rtr_refcount);
- LASSERT(list_empty(&lp->lp_txq));
- LASSERT(list_empty(&lp->lp_hashlist));
- LASSERT(!lp->lp_txqnob);
-
- ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
- LASSERT(ptable->pt_number > 0);
- ptable->pt_number--;
-
- lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
- lp->lp_ni = NULL;
-
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- LASSERT(ptable->pt_zombies > 0);
- ptable->pt_zombies--;
-}
-
-struct lnet_peer *
-lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
-{
- struct list_head *peers;
- struct lnet_peer *lp;
-
- LASSERT(!the_lnet.ln_shutdown);
-
- peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
- list_for_each_entry(lp, peers, lp_hashlist) {
- if (lp->lp_nid == nid) {
- lnet_peer_addref_locked(lp);
- return lp;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt)
-{
- struct lnet_peer_table *ptable;
- struct lnet_peer *lp = NULL;
- struct lnet_peer *lp2;
- int cpt2;
- int rc = 0;
-
- *lpp = NULL;
- if (the_lnet.ln_shutdown) /* it's shutting down */
- return -ESHUTDOWN;
-
- /* cpt can be LNET_LOCK_EX if it's called from router functions */
- cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
-
- ptable = the_lnet.ln_peer_tables[cpt2];
- lp = lnet_find_peer_locked(ptable, nid);
- if (lp) {
- *lpp = lp;
- return 0;
- }
-
- if (!list_empty(&ptable->pt_deathrow)) {
- lp = list_entry(ptable->pt_deathrow.next,
- struct lnet_peer, lp_hashlist);
- list_del(&lp->lp_hashlist);
- }
-
- /*
- * take extra refcount in case another thread has shutdown LNet
- * and destroyed locks and peer-table before I finish the allocation
- */
- ptable->pt_number++;
- lnet_net_unlock(cpt);
-
- if (lp)
- memset(lp, 0, sizeof(*lp));
- else
- lp = kzalloc_cpt(sizeof(*lp), GFP_NOFS, cpt2);
-
- if (!lp) {
- rc = -ENOMEM;
- lnet_net_lock(cpt);
- goto out;
- }
-
- INIT_LIST_HEAD(&lp->lp_txq);
- INIT_LIST_HEAD(&lp->lp_rtrq);
- INIT_LIST_HEAD(&lp->lp_routes);
-
- lp->lp_notify = 0;
- lp->lp_notifylnd = 0;
- lp->lp_notifying = 0;
- lp->lp_alive_count = 0;
- lp->lp_timestamp = 0;
- lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
- lp->lp_last_alive = cfs_time_current(); /* assumes alive */
- lp->lp_last_query = 0; /* haven't asked NI yet */
- lp->lp_ping_timestamp = 0;
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
- lp->lp_nid = nid;
- lp->lp_cpt = cpt2;
- lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
- lp->lp_rtr_refcount = 0;
-
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- rc = -ESHUTDOWN;
- goto out;
- }
-
- lp2 = lnet_find_peer_locked(ptable, nid);
- if (lp2) {
- *lpp = lp2;
- goto out;
- }
-
- lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
- if (!lp->lp_ni) {
- rc = -EHOSTUNREACH;
- goto out;
- }
-
- lp->lp_txcredits = lp->lp_ni->ni_peertxcredits;
- lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
- lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
- lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
-
- list_add_tail(&lp->lp_hashlist,
- &ptable->pt_hash[lnet_nid2peerhash(nid)]);
- ptable->pt_version++;
- *lpp = lp;
-
- return 0;
-out:
- if (lp)
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- ptable->pt_number--;
- return rc;
-}
-
-void
-lnet_debug_peer(lnet_nid_t nid)
-{
- char *aliveness = "NA";
- struct lnet_peer *lp;
- int rc;
- int cpt;
-
- cpt = lnet_cpt_of_nid(nid);
- lnet_net_lock(cpt);
-
- rc = lnet_nid2peer_locked(&lp, nid, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
- return;
- }
-
- if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
- aliveness = lp->lp_alive ? "up" : "down";
-
- CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- aliveness, lp->lp_ni->ni_peertxcredits,
- lp->lp_rtrcredits, lp->lp_minrtrcredits,
- lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
-
- lnet_peer_decref_locked(lp);
-
- lnet_net_unlock(cpt);
-}
-
-int
-lnet_get_peer_info(__u32 peer_index, __u64 *nid,
- char aliveness[LNET_MAX_STR_LEN],
- __u32 *cpt_iter, __u32 *refcount,
- __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
- __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
- __u32 *peer_tx_qnob)
-{
- struct lnet_peer_table *peer_table;
- struct lnet_peer *lp;
- bool found = false;
- int lncpt, j;
-
- /* get the number of CPTs */
- lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
-
- /*
- * if the cpt number to be examined is >= the number of cpts in
- * the system then indicate that there are no more cpts to examin
- */
- if (*cpt_iter >= lncpt)
- return -ENOENT;
-
- /* get the current table */
- peer_table = the_lnet.ln_peer_tables[*cpt_iter];
- /* if the ptable is NULL then there are no more cpts to examine */
- if (!peer_table)
- return -ENOENT;
-
- lnet_net_lock(*cpt_iter);
-
- for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
- struct list_head *peers = &peer_table->pt_hash[j];
-
- list_for_each_entry(lp, peers, lp_hashlist) {
- if (peer_index-- > 0)
- continue;
-
- snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
- if (lnet_isrouter(lp) ||
- lnet_peer_aliveness_enabled(lp))
- snprintf(aliveness, LNET_MAX_STR_LEN,
- lp->lp_alive ? "up" : "down");
-
- *nid = lp->lp_nid;
- *refcount = lp->lp_refcount;
- *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
- *peer_tx_credits = lp->lp_txcredits;
- *peer_rtr_credits = lp->lp_rtrcredits;
- *peer_min_rtr_credits = lp->lp_mintxcredits;
- *peer_tx_qnob = lp->lp_txqnob;
-
- found = true;
- }
- }
- lnet_net_unlock(*cpt_iter);
-
- *cpt_iter = lncpt;
-
- return found ? 0 : -ENOENT;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
deleted file mode 100644
index a3c3f4959f46..000000000000
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ /dev/null
@@ -1,1800 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/completion.h>
-#include <linux/lnet/lib-lnet.h>
-
-#define LNET_NRB_TINY_MIN 512 /* min value for each CPT */
-#define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4)
-#define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */
-#define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4)
-#define LNET_NRB_SMALL_PAGES 1
-#define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */
-#define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4)
-#define LNET_NRB_LARGE_PAGES ((LNET_MTU + PAGE_SIZE - 1) >> \
- PAGE_SHIFT)
-
-static char *forwarding = "";
-module_param(forwarding, charp, 0444);
-MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
-
-static int tiny_router_buffers;
-module_param(tiny_router_buffers, int, 0444);
-MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router");
-static int small_router_buffers;
-module_param(small_router_buffers, int, 0444);
-MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router");
-static int large_router_buffers;
-module_param(large_router_buffers, int, 0444);
-MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router");
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer");
-
-static int auto_down = 1;
-module_param(auto_down, int, 0444);
-MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
-
-int
-lnet_peer_buffer_credits(struct lnet_ni *ni)
-{
- /* NI option overrides LNet default */
- if (ni->ni_peerrtrcredits > 0)
- return ni->ni_peerrtrcredits;
- if (peer_buffer_credits > 0)
- return peer_buffer_credits;
-
- /*
- * As an approximation, allow this peer the same number of router
- * buffers as it is allowed outstanding sends
- */
- return ni->ni_peertxcredits;
-}
-
-/* forward ref's */
-static int lnet_router_checker(void *);
-
-static int check_routers_before_use;
-module_param(check_routers_before_use, int, 0444);
-MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
-
-int avoid_asym_router_failure = 1;
-module_param(avoid_asym_router_failure, int, 0644);
-MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
-
-static int dead_router_check_interval = 60;
-module_param(dead_router_check_interval, int, 0644);
-MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)");
-
-static int live_router_check_interval = 60;
-module_param(live_router_check_interval, int, 0644);
-MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)");
-
-static int router_ping_timeout = 50;
-module_param(router_ping_timeout, int, 0644);
-MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query");
-
-int
-lnet_peers_start_down(void)
-{
- return check_routers_before_use;
-}
-
-void
-lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
- unsigned long when)
-{
- if (time_before(when, lp->lp_timestamp)) { /* out of date information */
- CDEBUG(D_NET, "Out of date\n");
- return;
- }
-
- lp->lp_timestamp = when; /* update timestamp */
- lp->lp_ping_deadline = 0; /* disable ping timeout */
-
- if (lp->lp_alive_count && /* got old news */
- (!lp->lp_alive) == (!alive)) { /* new date for old news */
- CDEBUG(D_NET, "Old news\n");
- return;
- }
-
- /* Flag that notification is outstanding */
-
- lp->lp_alive_count++;
- lp->lp_alive = !(!alive); /* 1 bit! */
- lp->lp_notify = 1;
- lp->lp_notifylnd |= notifylnd;
- if (lp->lp_alive)
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
-
- CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
-}
-
-static void
-lnet_ni_notify_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
- int alive;
- int notifylnd;
-
- /*
- * Notify only in 1 thread at any time to ensure ordered notification.
- * NB individual events can be missed; the only guarantee is that you
- * always get the most recent news
- */
- if (lp->lp_notifying || !ni)
- return;
-
- lp->lp_notifying = 1;
-
- while (lp->lp_notify) {
- alive = lp->lp_alive;
- notifylnd = lp->lp_notifylnd;
-
- lp->lp_notifylnd = 0;
- lp->lp_notify = 0;
-
- if (notifylnd && ni->ni_lnd->lnd_notify) {
- lnet_net_unlock(lp->lp_cpt);
-
- /*
- * A new notification could happen now; I'll handle it
- * when control returns to me
- */
- ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive);
-
- lnet_net_lock(lp->lp_cpt);
- }
- }
-
- lp->lp_notifying = 0;
-}
-
-static void
-lnet_rtr_addref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount >= 0);
-
- /* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount++;
- if (lp->lp_rtr_refcount == 1) {
- struct list_head *pos;
-
- /* a simple insertion sort */
- list_for_each_prev(pos, &the_lnet.ln_routers) {
- struct lnet_peer *rtr;
-
- rtr = list_entry(pos, struct lnet_peer, lp_rtr_list);
- if (rtr->lp_nid < lp->lp_nid)
- break;
- }
-
- list_add(&lp->lp_rtr_list, pos);
- /* addref for the_lnet.ln_routers */
- lnet_peer_addref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-static void
-lnet_rtr_decref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount > 0);
-
- /* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount--;
- if (!lp->lp_rtr_refcount) {
- LASSERT(list_empty(&lp->lp_routes));
-
- if (lp->lp_rcd) {
- list_add(&lp->lp_rcd->rcd_list,
- &the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
- }
-
- list_del(&lp->lp_rtr_list);
- /* decref for the_lnet.ln_routers */
- lnet_peer_decref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-struct lnet_remotenet *
-lnet_find_net_locked(__u32 net)
-{
- struct lnet_remotenet *rnet;
- struct list_head *rn_list;
-
- LASSERT(!the_lnet.ln_shutdown);
-
- rn_list = lnet_net2rnethash(net);
- list_for_each_entry(rnet, rn_list, lrn_list) {
- if (rnet->lrn_net == net)
- return rnet;
- }
- return NULL;
-}
-
-static void lnet_shuffle_seed(void)
-{
- static int seeded;
- struct lnet_ni *ni;
-
- if (seeded)
- return;
-
- /*
- * Nodes with small feet have little entropy
- * the NID for this node gives the most entropy in the low bits
- */
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- __u32 lnd_type, seed;
-
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
- if (lnd_type != LOLND) {
- seed = (LNET_NIDADDR(ni->ni_nid) | lnd_type);
- add_device_randomness(&seed, sizeof(seed));
- }
- }
-
- seeded = 1;
-}
-
-/* NB expects LNET_LOCK held */
-static void
-lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
-{
- unsigned int len = 0;
- unsigned int offset = 0;
- struct list_head *e;
-
- lnet_shuffle_seed();
-
- list_for_each(e, &rnet->lrn_routes) {
- len++;
- }
-
- /* len+1 positions to add a new entry */
- offset = prandom_u32_max(len + 1);
- list_for_each(e, &rnet->lrn_routes) {
- if (!offset)
- break;
- offset--;
- }
- list_add(&route->lr_list, e);
- list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
-
- the_lnet.ln_remote_nets_version++;
- lnet_rtr_addref_locked(route->lr_gateway);
-}
-
-int
-lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
- unsigned int priority)
-{
- struct list_head *e;
- struct lnet_remotenet *rnet;
- struct lnet_remotenet *rnet2;
- struct lnet_route *route;
- struct lnet_ni *ni;
- int add_route;
- int rc;
-
- CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n",
- libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
-
- if (gateway == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
- net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND ||
- LNET_NIDNET(gateway) == net ||
- (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255)))
- return -EINVAL;
-
- if (lnet_islocalnet(net)) /* it's a local network */
- return -EEXIST;
-
- /* Assume net, route, all new */
- route = kzalloc(sizeof(*route), GFP_NOFS);
- rnet = kzalloc(sizeof(*rnet), GFP_NOFS);
- if (!route || !rnet) {
- CERROR("Out of memory creating route %s %d %s\n",
- libcfs_net2str(net), hops, libcfs_nid2str(gateway));
- kfree(route);
- kfree(rnet);
- return -ENOMEM;
- }
-
- INIT_LIST_HEAD(&rnet->lrn_routes);
- rnet->lrn_net = net;
- route->lr_hops = hops;
- route->lr_net = net;
- route->lr_priority = priority;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
- if (rc) {
- lnet_net_unlock(LNET_LOCK_EX);
-
- kfree(route);
- kfree(rnet);
-
- if (rc == -EHOSTUNREACH) /* gateway is not on a local net */
- return rc; /* ignore the route entry */
- CERROR("Error %d creating route %s %d %s\n", rc,
- libcfs_net2str(net), hops,
- libcfs_nid2str(gateway));
- return rc;
- }
-
- LASSERT(!the_lnet.ln_shutdown);
-
- rnet2 = lnet_find_net_locked(net);
- if (!rnet2) {
- /* new network */
- list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
- rnet2 = rnet;
- }
-
- /* Search for a duplicate route (it's a NOOP if it is) */
- add_route = 1;
- list_for_each(e, &rnet2->lrn_routes) {
- struct lnet_route *route2;
-
- route2 = list_entry(e, struct lnet_route, lr_list);
- if (route2->lr_gateway == route->lr_gateway) {
- add_route = 0;
- break;
- }
-
- /* our lookups must be true */
- LASSERT(route2->lr_gateway->lp_nid != gateway);
- }
-
- if (add_route) {
- lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
- lnet_add_route_to_rnet(rnet2, route);
-
- ni = route->lr_gateway->lp_ni;
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* XXX Assume alive */
- if (ni->ni_lnd->lnd_notify)
- ni->ni_lnd->lnd_notify(ni, gateway, 1);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- /* -1 for notify or !add_route */
- lnet_peer_decref_locked(route->lr_gateway);
- lnet_net_unlock(LNET_LOCK_EX);
- rc = 0;
-
- if (!add_route) {
- rc = -EEXIST;
- kfree(route);
- }
-
- if (rnet != rnet2)
- kfree(rnet);
-
- /* indicate to startup the router checker if configured */
- wake_up(&the_lnet.ln_rc_waitq);
-
- return rc;
-}
-
-int
-lnet_check_routes(void)
-{
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct lnet_route *route2;
- struct list_head *e1;
- struct list_head *e2;
- int cpt;
- struct list_head *rn_list;
- int i;
-
- cpt = lnet_net_lock_current();
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- route2 = NULL;
- list_for_each(e2, &rnet->lrn_routes) {
- lnet_nid_t nid1;
- lnet_nid_t nid2;
- int net;
-
- route = list_entry(e2, struct lnet_route, lr_list);
-
- if (!route2) {
- route2 = route;
- continue;
- }
-
- if (route->lr_gateway->lp_ni ==
- route2->lr_gateway->lp_ni)
- continue;
-
- nid1 = route->lr_gateway->lp_nid;
- nid2 = route2->lr_gateway->lp_nid;
- net = rnet->lrn_net;
-
- lnet_net_unlock(cpt);
-
- CERROR("Routes to %s via %s and %s not supported\n",
- libcfs_net2str(net),
- libcfs_nid2str(nid1),
- libcfs_nid2str(nid2));
- return -EINVAL;
- }
- }
- }
-
- lnet_net_unlock(cpt);
- return 0;
-}
-
-int
-lnet_del_route(__u32 net, lnet_nid_t gw_nid)
-{
- struct lnet_peer *gateway;
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct list_head *e1;
- struct list_head *e2;
- int rc = -ENOENT;
- struct list_head *rn_list;
- int idx = 0;
-
- CDEBUG(D_NET, "Del route: net %s : gw %s\n",
- libcfs_net2str(net), libcfs_nid2str(gw_nid));
-
- /*
- * NB Caller may specify either all routes via the given gateway
- * or a specific route entry actual NIDs)
- */
- lnet_net_lock(LNET_LOCK_EX);
- if (net == LNET_NIDNET(LNET_NID_ANY))
- rn_list = &the_lnet.ln_remote_nets_hash[0];
- else
- rn_list = lnet_net2rnethash(net);
-
- again:
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
- net == rnet->lrn_net))
- continue;
-
- list_for_each(e2, &rnet->lrn_routes) {
- route = list_entry(e2, struct lnet_route, lr_list);
-
- gateway = route->lr_gateway;
- if (!(gw_nid == LNET_NID_ANY ||
- gw_nid == gateway->lp_nid))
- continue;
-
- list_del(&route->lr_list);
- list_del(&route->lr_gwlist);
- the_lnet.ln_remote_nets_version++;
-
- if (list_empty(&rnet->lrn_routes))
- list_del(&rnet->lrn_list);
- else
- rnet = NULL;
-
- lnet_rtr_decref_locked(gateway);
- lnet_peer_decref_locked(gateway);
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- kfree(route);
- kfree(rnet);
-
- rc = 0;
- lnet_net_lock(LNET_LOCK_EX);
- goto again;
- }
- }
-
- if (net == LNET_NIDNET(LNET_NID_ANY) &&
- ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
- rn_list = &the_lnet.ln_remote_nets_hash[idx];
- goto again;
- }
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-void
-lnet_destroy_routes(void)
-{
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
-}
-
-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg)
-{
- int i, rc = -ENOENT, j;
-
- if (!the_lnet.ln_rtrpools)
- return rc;
-
- for (i = 0; i < LNET_NRBPOOLS; i++) {
- struct lnet_rtrbufpool *rbp;
-
- lnet_net_lock(LNET_LOCK_EX);
- cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) {
- if (i++ != idx)
- continue;
-
- pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages;
- pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers;
- pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits;
- pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits;
- rc = 0;
- break;
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- pool_cfg->pl_routing = the_lnet.ln_routing;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-int
-lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
-{
- struct list_head *e1;
- struct list_head *e2;
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- int cpt;
- int i;
- struct list_head *rn_list;
-
- cpt = lnet_net_lock_current();
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- list_for_each(e2, &rnet->lrn_routes) {
- route = list_entry(e2, struct lnet_route,
- lr_list);
-
- if (!idx--) {
- *net = rnet->lrn_net;
- *hops = route->lr_hops;
- *priority = route->lr_priority;
- *gateway = route->lr_gateway->lp_nid;
- *alive = lnet_is_route_alive(route);
- lnet_net_unlock(cpt);
- return 0;
- }
- }
- }
- }
-
- lnet_net_unlock(cpt);
- return -ENOENT;
-}
-
-void
-lnet_swap_pinginfo(struct lnet_ping_info *info)
-{
- int i;
- struct lnet_ni_status *stat;
-
- __swab32s(&info->pi_magic);
- __swab32s(&info->pi_features);
- __swab32s(&info->pi_pid);
- __swab32s(&info->pi_nnis);
- for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
- stat = &info->pi_ni[i];
- __swab64s(&stat->ns_nid);
- __swab32s(&stat->ns_status);
- }
-}
-
-/**
- * parse router-checker pinginfo, record number of down NIs for remote
- * networks on that router.
- */
-static void
-lnet_parse_rc_info(struct lnet_rc_data *rcd)
-{
- struct lnet_ping_info *info = rcd->rcd_pinginfo;
- struct lnet_peer *gw = rcd->rcd_gateway;
- struct lnet_route *rte;
-
- if (!gw->lp_alive)
- return;
-
- if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
- lnet_swap_pinginfo(info);
-
- /* NB always racing with network! */
- if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
- CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
- libcfs_nid2str(gw->lp_nid), info->pi_magic);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- gw->lp_ping_feats = info->pi_features;
- if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) {
- CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
- libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
- return; /* nothing I can understand */
- }
-
- if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS))
- return; /* can't carry NI status info */
-
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
- int down = 0;
- int up = 0;
- int i;
-
- if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) {
- rte->lr_downis = 1;
- continue;
- }
-
- for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
- struct lnet_ni_status *stat = &info->pi_ni[i];
- lnet_nid_t nid = stat->ns_nid;
-
- if (nid == LNET_NID_ANY) {
- CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
- libcfs_nid2str(gw->lp_nid));
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
- continue;
-
- if (stat->ns_status == LNET_NI_STATUS_DOWN) {
- down++;
- continue;
- }
-
- if (stat->ns_status == LNET_NI_STATUS_UP) {
- if (LNET_NIDNET(nid) == rte->lr_net) {
- up = 1;
- break;
- }
- continue;
- }
-
- CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
- libcfs_nid2str(gw->lp_nid), stat->ns_status);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- if (up) { /* ignore downed NIs if NI for dest network is up */
- rte->lr_downis = 0;
- continue;
- }
- /**
- * if @down is zero and this route is single-hop, it means
- * we can't find NI for target network
- */
- if (!down && rte->lr_hops == 1)
- down = 1;
-
- rte->lr_downis = down;
- }
-}
-
-static void
-lnet_router_checker_event(struct lnet_event *event)
-{
- struct lnet_rc_data *rcd = event->md.user_ptr;
- struct lnet_peer *lp;
-
- LASSERT(rcd);
-
- if (event->unlinked) {
- LNetInvalidateMDHandle(&rcd->rcd_mdh);
- return;
- }
-
- LASSERT(event->type == LNET_EVENT_SEND ||
- event->type == LNET_EVENT_REPLY);
-
- lp = rcd->rcd_gateway;
- LASSERT(lp);
-
- /*
- * NB: it's called with holding lnet_res_lock, we have a few
- * places need to hold both locks at the same time, please take
- * care of lock ordering
- */
- lnet_net_lock(lp->lp_cpt);
- if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
- /* ignore if no longer a router or rcd is replaced */
- goto out;
- }
-
- if (event->type == LNET_EVENT_SEND) {
- lp->lp_ping_notsent = 0;
- if (!event->status)
- goto out;
- }
-
- /* LNET_EVENT_REPLY */
- /*
- * A successful REPLY means the router is up. If _any_ comms
- * to the router fail I assume it's down (this will happen if
- * we ping alive routers to try to detect router death before
- * apps get burned).
- */
- lnet_notify_locked(lp, 1, !event->status, cfs_time_current());
-
- /*
- * The router checker will wake up very shortly and do the
- * actual notification.
- * XXX If 'lp' stops being a router before then, it will still
- * have the notification pending!!!
- */
- if (avoid_asym_router_failure && !event->status)
- lnet_parse_rc_info(rcd);
-
- out:
- lnet_net_unlock(lp->lp_cpt);
-}
-
-static void
-lnet_wait_known_routerstate(void)
-{
- struct lnet_peer *rtr;
- struct list_head *entry;
- int all_known;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
-
- for (;;) {
- int cpt = lnet_net_lock_current();
-
- all_known = 1;
- list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
- if (!rtr->lp_alive_count) {
- all_known = 0;
- break;
- }
- }
-
- lnet_net_unlock(cpt);
-
- if (all_known)
- return;
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-}
-
-void
-lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net)
-{
- struct lnet_route *rte;
-
- if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) {
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
- if (rte->lr_net == net) {
- rte->lr_downis = 0;
- break;
- }
- }
- }
-}
-
-static void
-lnet_update_ni_status_locked(void)
-{
- struct lnet_ni *ni;
- time64_t now;
- int timeout;
-
- LASSERT(the_lnet.ln_routing);
-
- timeout = router_ping_timeout +
- max(live_router_check_interval, dead_router_check_interval);
-
- now = ktime_get_real_seconds();
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- if (ni->ni_lnd->lnd_type == LOLND)
- continue;
-
- if (now < ni->ni_last_alive + timeout)
- continue;
-
- lnet_ni_lock(ni);
- /* re-check with lock */
- if (now < ni->ni_last_alive + timeout) {
- lnet_ni_unlock(ni);
- continue;
- }
-
- LASSERT(ni->ni_status);
-
- if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
- CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
- libcfs_nid2str(ni->ni_nid), timeout);
- /*
- * NB: so far, this is the only place to set
- * NI status to "down"
- */
- ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
- }
- lnet_ni_unlock(ni);
- }
-}
-
-static void
-lnet_destroy_rc_data(struct lnet_rc_data *rcd)
-{
- LASSERT(list_empty(&rcd->rcd_list));
- /* detached from network */
- LASSERT(LNetMDHandleIsInvalid(rcd->rcd_mdh));
-
- if (rcd->rcd_gateway) {
- int cpt = rcd->rcd_gateway->lp_cpt;
-
- lnet_net_lock(cpt);
- lnet_peer_decref_locked(rcd->rcd_gateway);
- lnet_net_unlock(cpt);
- }
-
- kfree(rcd->rcd_pinginfo);
-
- kfree(rcd);
-}
-
-static struct lnet_rc_data *
-lnet_create_rc_data_locked(struct lnet_peer *gateway)
-{
- struct lnet_rc_data *rcd = NULL;
- struct lnet_ping_info *pi;
- struct lnet_md md;
- int rc;
- int i;
-
- lnet_net_unlock(gateway->lp_cpt);
-
- rcd = kzalloc(sizeof(*rcd), GFP_NOFS);
- if (!rcd)
- goto out;
-
- LNetInvalidateMDHandle(&rcd->rcd_mdh);
- INIT_LIST_HEAD(&rcd->rcd_list);
-
- pi = kzalloc(LNET_PINGINFO_SIZE, GFP_NOFS);
- if (!pi)
- goto out;
-
- for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
- pi->pi_ni[i].ns_nid = LNET_NID_ANY;
- pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
- }
- rcd->rcd_pinginfo = pi;
-
- md.start = pi;
- md.user_ptr = rcd;
- md.length = LNET_PINGINFO_SIZE;
- md.threshold = LNET_MD_THRESH_INF;
- md.options = LNET_MD_TRUNCATE;
- md.eq_handle = the_lnet.ln_rc_eqh;
-
- LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh));
- rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh);
- if (rc < 0) {
- CERROR("Can't bind MD: %d\n", rc);
- goto out;
- }
- LASSERT(!rc);
-
- lnet_net_lock(gateway->lp_cpt);
- /* router table changed or someone has created rcd for this gateway */
- if (!lnet_isrouter(gateway) || gateway->lp_rcd) {
- lnet_net_unlock(gateway->lp_cpt);
- goto out;
- }
-
- lnet_peer_addref_locked(gateway);
- rcd->rcd_gateway = gateway;
- gateway->lp_rcd = rcd;
- gateway->lp_ping_notsent = 0;
-
- return rcd;
-
- out:
- if (rcd) {
- if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) {
- rc = LNetMDUnlink(rcd->rcd_mdh);
- LASSERT(!rc);
- }
- lnet_destroy_rc_data(rcd);
- }
-
- lnet_net_lock(gateway->lp_cpt);
- return gateway->lp_rcd;
-}
-
-static int
-lnet_router_check_interval(struct lnet_peer *rtr)
-{
- int secs;
-
- secs = rtr->lp_alive ? live_router_check_interval :
- dead_router_check_interval;
- if (secs < 0)
- secs = 0;
-
- return secs;
-}
-
-static void
-lnet_ping_router_locked(struct lnet_peer *rtr)
-{
- struct lnet_rc_data *rcd = NULL;
- unsigned long now = cfs_time_current();
- int secs;
-
- lnet_peer_addref_locked(rtr);
-
- if (rtr->lp_ping_deadline && /* ping timed out? */
- cfs_time_after(now, rtr->lp_ping_deadline))
- lnet_notify_locked(rtr, 1, 0, now);
-
- /* Run any outstanding notifications */
- lnet_ni_notify_locked(rtr->lp_ni, rtr);
-
- if (!lnet_isrouter(rtr) ||
- the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router table changed or router checker is shutting down */
- lnet_peer_decref_locked(rtr);
- return;
- }
-
- rcd = rtr->lp_rcd ?
- rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
-
- if (!rcd)
- return;
-
- secs = lnet_router_check_interval(rtr);
-
- CDEBUG(D_NET,
- "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n",
- libcfs_nid2str(rtr->lp_nid), secs,
- rtr->lp_ping_deadline, rtr->lp_ping_notsent,
- rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
-
- if (secs && !rtr->lp_ping_notsent &&
- cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp,
- secs * HZ))) {
- int rc;
- struct lnet_process_id id;
- struct lnet_handle_md mdh;
-
- id.nid = rtr->lp_nid;
- id.pid = LNET_PID_LUSTRE;
- CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
-
- rtr->lp_ping_notsent = 1;
- rtr->lp_ping_timestamp = now;
-
- mdh = rcd->rcd_mdh;
-
- if (!rtr->lp_ping_deadline) {
- rtr->lp_ping_deadline =
- cfs_time_shift(router_ping_timeout);
- }
-
- lnet_net_unlock(rtr->lp_cpt);
-
- rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- lnet_net_lock(rtr->lp_cpt);
- if (rc)
- rtr->lp_ping_notsent = 0; /* no event pending */
- }
-
- lnet_peer_decref_locked(rtr);
-}
-
-int
-lnet_router_checker_start(void)
-{
- struct task_struct *task;
- int rc;
- int eqsz = 0;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- if (check_routers_before_use &&
- dead_router_check_interval <= 0) {
- LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n");
- return -EINVAL;
- }
-
- init_completion(&the_lnet.ln_rc_signal);
-
- rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh);
- if (rc) {
- CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
- return -ENOMEM;
- }
-
- the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
- task = kthread_run(lnet_router_checker, NULL, "router_checker");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("Can't start router checker thread: %d\n", rc);
- /* block until event callback signals exit */
- wait_for_completion(&the_lnet.ln_rc_signal);
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT(!rc);
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- return -ENOMEM;
- }
-
- if (check_routers_before_use) {
- /*
- * Note that a helpful side-effect of pinging all known routers
- * at startup is that it makes them drop stale connections they
- * may have to a previous instance of me.
- */
- lnet_wait_known_routerstate();
- }
-
- return 0;
-}
-
-void
-lnet_router_checker_stop(void)
-{
- int rc;
-
- if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
- return;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
- the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
- /* wakeup the RC thread if it's sleeping */
- wake_up(&the_lnet.ln_rc_waitq);
-
- /* block until event callback signals exit */
- wait_for_completion(&the_lnet.ln_rc_signal);
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT(!rc);
-}
-
-static void
-lnet_prune_rc_data(int wait_unlink)
-{
- struct lnet_rc_data *rcd;
- struct lnet_rc_data *tmp;
- struct lnet_peer *lp;
- struct list_head head;
- int i = 2;
-
- if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
- list_empty(&the_lnet.ln_rcd_deathrow) &&
- list_empty(&the_lnet.ln_rcd_zombie)))
- return;
-
- INIT_LIST_HEAD(&head);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router checker is stopping, prune all */
- list_for_each_entry(lp, &the_lnet.ln_routers,
- lp_rtr_list) {
- if (!lp->lp_rcd)
- continue;
-
- LASSERT(list_empty(&lp->lp_rcd->rcd_list));
- list_add(&lp->lp_rcd->rcd_list,
- &the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
- }
- }
-
- /* unlink all RCDs on deathrow list */
- list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
-
- if (!list_empty(&head)) {
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry(rcd, &head, rcd_list)
- LNetMDUnlink(rcd->rcd_mdh);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- list_splice_init(&head, &the_lnet.ln_rcd_zombie);
-
- /* release all zombie RCDs */
- while (!list_empty(&the_lnet.ln_rcd_zombie)) {
- list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
- rcd_list) {
- if (LNetMDHandleIsInvalid(rcd->rcd_mdh))
- list_move(&rcd->rcd_list, &head);
- }
-
- wait_unlink = wait_unlink &&
- !list_empty(&the_lnet.ln_rcd_zombie);
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- while (!list_empty(&head)) {
- rcd = list_entry(head.next,
- struct lnet_rc_data, rcd_list);
- list_del_init(&rcd->rcd_list);
- lnet_destroy_rc_data(rcd);
- }
-
- if (!wait_unlink)
- return;
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for rc buffers to unlink\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ / 4);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/*
- * This function is called to check if the RC should block indefinitely.
- * It's called from lnet_router_checker() as well as being passed to
- * wait_event_interruptible() to avoid the lost wake_up problem.
- *
- * When it's called from wait_event_interruptible() it is necessary to
- * also not sleep if the rc state is not running to avoid a deadlock
- * when the system is shutting down
- */
-static inline bool
-lnet_router_checker_active(void)
-{
- if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING)
- return true;
-
- /*
- * Router Checker thread needs to run when routing is enabled in
- * order to call lnet_update_ni_status_locked()
- */
- if (the_lnet.ln_routing)
- return true;
-
- return !list_empty(&the_lnet.ln_routers) &&
- (live_router_check_interval > 0 ||
- dead_router_check_interval > 0);
-}
-
-static int
-lnet_router_checker(void *arg)
-{
- struct lnet_peer *rtr;
- struct list_head *entry;
-
- while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
- __u64 version;
- int cpt;
- int cpt2;
-
- cpt = lnet_net_lock_current();
-rescan:
- version = the_lnet.ln_routers_version;
-
- list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
- cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
- if (cpt != cpt2) {
- lnet_net_unlock(cpt);
- cpt = cpt2;
- lnet_net_lock(cpt);
- /* the routers list has changed */
- if (version != the_lnet.ln_routers_version)
- goto rescan;
- }
-
- lnet_ping_router_locked(rtr);
-
- /* NB dropped lock */
- if (version != the_lnet.ln_routers_version) {
- /* the routers list has changed */
- goto rescan;
- }
- }
-
- if (the_lnet.ln_routing)
- lnet_update_ni_status_locked();
-
- lnet_net_unlock(cpt);
-
- lnet_prune_rc_data(0); /* don't wait for UNLINK */
-
- /*
- * Call schedule_timeout() here always adds 1 to load average
- * because kernel counts # active tasks as nr_running
- * + nr_uninterruptible.
- */
- /*
- * if there are any routes then wakeup every second. If
- * there are no routes then sleep indefinitely until woken
- * up by a user adding a route
- */
- if (!lnet_router_checker_active())
- wait_event_interruptible(the_lnet.ln_rc_waitq,
- lnet_router_checker_active());
- else
- wait_event_interruptible_timeout(the_lnet.ln_rc_waitq,
- false,
- HZ);
- }
-
- lnet_prune_rc_data(1); /* wait for UNLINK */
-
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- complete(&the_lnet.ln_rc_signal);
- /* The unlink event callback will signal final completion */
- return 0;
-}
-
-void
-lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages)
-{
- while (--npages >= 0)
- __free_page(rb->rb_kiov[npages].bv_page);
-
- kfree(rb);
-}
-
-static struct lnet_rtrbuf *
-lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt)
-{
- int npages = rbp->rbp_npages;
- int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]);
- struct page *page;
- struct lnet_rtrbuf *rb;
- int i;
-
- rb = kzalloc_cpt(sz, GFP_NOFS, cpt);
- if (!rb)
- return NULL;
-
- rb->rb_pool = rbp;
-
- for (i = 0; i < npages; i++) {
- page = alloc_pages_node(
- cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_KERNEL | __GFP_ZERO, 0);
- if (!page) {
- while (--i >= 0)
- __free_page(rb->rb_kiov[i].bv_page);
-
- kfree(rb);
- return NULL;
- }
-
- rb->rb_kiov[i].bv_len = PAGE_SIZE;
- rb->rb_kiov[i].bv_offset = 0;
- rb->rb_kiov[i].bv_page = page;
- }
-
- return rb;
-}
-
-static void
-lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
-{
- int npages = rbp->rbp_npages;
- struct list_head tmp;
- struct lnet_rtrbuf *rb;
- struct lnet_rtrbuf *temp;
-
- if (!rbp->rbp_nbuffers) /* not initialized or already freed */
- return;
-
- INIT_LIST_HEAD(&tmp);
-
- lnet_net_lock(cpt);
- lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt);
- list_splice_init(&rbp->rbp_bufs, &tmp);
- rbp->rbp_req_nbuffers = 0;
- rbp->rbp_nbuffers = 0;
- rbp->rbp_credits = 0;
- rbp->rbp_mincredits = 0;
- lnet_net_unlock(cpt);
-
- /* Free buffers on the free list. */
- list_for_each_entry_safe(rb, temp, &tmp, rb_list) {
- list_del(&rb->rb_list);
- lnet_destroy_rtrbuf(rb, npages);
- }
-}
-
-static int
-lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
-{
- struct list_head rb_list;
- struct lnet_rtrbuf *rb;
- int num_rb;
- int num_buffers = 0;
- int old_req_nbufs;
- int npages = rbp->rbp_npages;
-
- lnet_net_lock(cpt);
- /*
- * If we are called for less buffers than already in the pool, we
- * just lower the req_nbuffers number and excess buffers will be
- * thrown away as they are returned to the free list. Credits
- * then get adjusted as well.
- * If we already have enough buffers allocated to serve the
- * increase requested, then we can treat that the same way as we
- * do the decrease.
- */
- num_rb = nbufs - rbp->rbp_nbuffers;
- if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) {
- rbp->rbp_req_nbuffers = nbufs;
- lnet_net_unlock(cpt);
- return 0;
- }
- /*
- * store the older value of rbp_req_nbuffers and then set it to
- * the new request to prevent lnet_return_rx_credits_locked() from
- * freeing buffers that we need to keep around
- */
- old_req_nbufs = rbp->rbp_req_nbuffers;
- rbp->rbp_req_nbuffers = nbufs;
- lnet_net_unlock(cpt);
-
- INIT_LIST_HEAD(&rb_list);
-
- /*
- * allocate the buffers on a local list first. If all buffers are
- * allocated successfully then join this list to the rbp buffer
- * list. If not then free all allocated buffers.
- */
- while (num_rb-- > 0) {
- rb = lnet_new_rtrbuf(rbp, cpt);
- if (!rb) {
- CERROR("Failed to allocate %d route bufs of %d pages\n",
- nbufs, npages);
-
- lnet_net_lock(cpt);
- rbp->rbp_req_nbuffers = old_req_nbufs;
- lnet_net_unlock(cpt);
-
- goto failed;
- }
-
- list_add(&rb->rb_list, &rb_list);
- num_buffers++;
- }
-
- lnet_net_lock(cpt);
-
- list_splice_tail(&rb_list, &rbp->rbp_bufs);
- rbp->rbp_nbuffers += num_buffers;
- rbp->rbp_credits += num_buffers;
- rbp->rbp_mincredits = rbp->rbp_credits;
- /*
- * We need to schedule blocked msg using the newly
- * added buffers.
- */
- while (!list_empty(&rbp->rbp_bufs) &&
- !list_empty(&rbp->rbp_msgs))
- lnet_schedule_blocked_locked(rbp);
-
- lnet_net_unlock(cpt);
-
- return 0;
-
-failed:
- while (!list_empty(&rb_list)) {
- rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list);
- list_del(&rb->rb_list);
- lnet_destroy_rtrbuf(rb, npages);
- }
-
- return -ENOMEM;
-}
-
-static void
-lnet_rtrpool_init(struct lnet_rtrbufpool *rbp, int npages)
-{
- INIT_LIST_HEAD(&rbp->rbp_msgs);
- INIT_LIST_HEAD(&rbp->rbp_bufs);
-
- rbp->rbp_npages = npages;
- rbp->rbp_credits = 0;
- rbp->rbp_mincredits = 0;
-}
-
-void
-lnet_rtrpools_free(int keep_pools)
-{
- struct lnet_rtrbufpool *rtrp;
- int i;
-
- if (!the_lnet.ln_rtrpools) /* uninitialized or freed */
- return;
-
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i);
- lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i);
- lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i);
- }
-
- if (!keep_pools) {
- cfs_percpt_free(the_lnet.ln_rtrpools);
- the_lnet.ln_rtrpools = NULL;
- }
-}
-
-static int
-lnet_nrb_tiny_calculate(void)
-{
- int nrbs = LNET_NRB_TINY;
-
- if (tiny_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "tiny_router_buffers=%d invalid when routing enabled\n",
- tiny_router_buffers);
- return -EINVAL;
- }
-
- if (tiny_router_buffers > 0)
- nrbs = tiny_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_TINY_MIN);
-}
-
-static int
-lnet_nrb_small_calculate(void)
-{
- int nrbs = LNET_NRB_SMALL;
-
- if (small_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "small_router_buffers=%d invalid when routing enabled\n",
- small_router_buffers);
- return -EINVAL;
- }
-
- if (small_router_buffers > 0)
- nrbs = small_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_SMALL_MIN);
-}
-
-static int
-lnet_nrb_large_calculate(void)
-{
- int nrbs = LNET_NRB_LARGE;
-
- if (large_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "large_router_buffers=%d invalid when routing enabled\n",
- large_router_buffers);
- return -EINVAL;
- }
-
- if (large_router_buffers > 0)
- nrbs = large_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_LARGE_MIN);
-}
-
-int
-lnet_rtrpools_alloc(int im_a_router)
-{
- struct lnet_rtrbufpool *rtrp;
- int nrb_tiny;
- int nrb_small;
- int nrb_large;
- int rc;
- int i;
-
- if (!strcmp(forwarding, "")) {
- /* not set either way */
- if (!im_a_router)
- return 0;
- } else if (!strcmp(forwarding, "disabled")) {
- /* explicitly disabled */
- return 0;
- } else if (!strcmp(forwarding, "enabled")) {
- /* explicitly enabled */
- } else {
- LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
- return -EINVAL;
- }
-
- nrb_tiny = lnet_nrb_tiny_calculate();
- if (nrb_tiny < 0)
- return -EINVAL;
-
- nrb_small = lnet_nrb_small_calculate();
- if (nrb_small < 0)
- return -EINVAL;
-
- nrb_large = lnet_nrb_large_calculate();
- if (nrb_large < 0)
- return -EINVAL;
-
- the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
- LNET_NRBPOOLS *
- sizeof(struct lnet_rtrbufpool));
- if (!the_lnet.ln_rtrpools) {
- LCONSOLE_ERROR_MSG(0x10c,
- "Failed to initialize router buffe pool\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
- nrb_tiny, i);
- if (rc)
- goto failed;
-
- lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX],
- LNET_NRB_SMALL_PAGES);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
- nrb_small, i);
- if (rc)
- goto failed;
-
- lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX],
- LNET_NRB_LARGE_PAGES);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
- nrb_large, i);
- if (rc)
- goto failed;
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 1;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return 0;
-
- failed:
- lnet_rtrpools_free(0);
- return rc;
-}
-
-static int
-lnet_rtrpools_adjust_helper(int tiny, int small, int large)
-{
- int nrb = 0;
- int rc = 0;
- int i;
- struct lnet_rtrbufpool *rtrp;
-
- /*
- * If the provided values for each buffer pool are different than the
- * configured values, we need to take action.
- */
- if (tiny >= 0) {
- tiny_router_buffers = tiny;
- nrb = lnet_nrb_tiny_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
- if (small >= 0) {
- small_router_buffers = small;
- nrb = lnet_nrb_small_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
- if (large >= 0) {
- large_router_buffers = large;
- nrb = lnet_nrb_large_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
-
- return 0;
-}
-
-int
-lnet_rtrpools_adjust(int tiny, int small, int large)
-{
- /*
- * this function doesn't revert the changes if adding new buffers
- * failed. It's up to the user space caller to revert the
- * changes.
- */
- if (!the_lnet.ln_routing)
- return 0;
-
- return lnet_rtrpools_adjust_helper(tiny, small, large);
-}
-
-int
-lnet_rtrpools_enable(void)
-{
- int rc = 0;
-
- if (the_lnet.ln_routing)
- return 0;
-
- if (!the_lnet.ln_rtrpools)
- /*
- * If routing is turned off, and we have never
- * initialized the pools before, just call the
- * standard buffer pool allocation routine as
- * if we are just configuring this for the first
- * time.
- */
- rc = lnet_rtrpools_alloc(1);
- else
- rc = lnet_rtrpools_adjust_helper(0, 0, 0);
- if (rc)
- return rc;
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 1;
-
- the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-void
-lnet_rtrpools_disable(void)
-{
- if (!the_lnet.ln_routing)
- return;
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 0;
- the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
-
- tiny_router_buffers = 0;
- small_router_buffers = 0;
- large_router_buffers = 0;
- lnet_net_unlock(LNET_LOCK_EX);
- lnet_rtrpools_free(1);
-}
-
-int
-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, unsigned long when)
-{
- struct lnet_peer *lp = NULL;
- unsigned long now = cfs_time_current();
- int cpt = lnet_cpt_of_nid(nid);
-
- LASSERT(!in_interrupt());
-
- CDEBUG(D_NET, "%s notifying %s: %s\n",
- !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid),
- alive ? "up" : "down");
-
- if (ni &&
- LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
- CWARN("Ignoring notification of %s %s by %s (different net)\n",
- libcfs_nid2str(nid), alive ? "birth" : "death",
- libcfs_nid2str(ni->ni_nid));
- return -EINVAL;
- }
-
- /* can't do predictions... */
- if (cfs_time_after(when, now)) {
- CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n",
- !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid), alive ? "up" : "down",
- cfs_duration_sec(cfs_time_sub(when, now)));
- return -EINVAL;
- }
-
- if (ni && !alive && /* LND telling me she's down */
- !auto_down) { /* auto-down disabled */
- CDEBUG(D_NET, "Auto-down disabled\n");
- return 0;
- }
-
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- lnet_net_unlock(cpt);
- return -ESHUTDOWN;
- }
-
- lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
- if (!lp) {
- /* nid not found */
- lnet_net_unlock(cpt);
- CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
- return 0;
- }
-
- /*
- * We can't fully trust LND on reporting exact peer last_alive
- * if he notifies us about dead peer. For example ksocklnd can
- * call us with when == _time_when_the_node_was_booted_ if
- * no connections were successfully established
- */
- if (ni && !alive && when < lp->lp_last_alive)
- when = lp->lp_last_alive;
-
- lnet_notify_locked(lp, !ni, alive, when);
-
- if (ni)
- lnet_ni_notify_locked(ni, lp);
-
- lnet_peer_decref_locked(lp);
-
- lnet_net_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(lnet_notify);
diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c
deleted file mode 100644
index 1a71ffebc889..000000000000
--- a/drivers/staging/lustre/lnet/lnet/router_proc.c
+++ /dev/null
@@ -1,909 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-
-/*
- * This is really lnet_proc.c. You might need to update sanity test 215
- * if any file format is changed.
- */
-
-#define LNET_LOFFT_BITS (sizeof(loff_t) * 8)
-/*
- * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
- */
-#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1)
-/* change version, 16 bits or 8 bits */
-#define LNET_PROC_VER_BITS max_t(size_t, min_t(size_t, LNET_LOFFT_BITS, 64) / 4, 8)
-
-#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS
-/*
- * bits for peer hash offset
- * NB: we don't use the highest bit of *ppos because it's signed
- */
-#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \
- LNET_PROC_CPT_BITS - \
- LNET_PROC_VER_BITS - \
- LNET_PROC_HASH_BITS - 1)
-/* bits for hash index + position */
-#define LNET_PROC_HPOS_BITS (LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
-/* bits for peer hash table + hash version */
-#define LNET_PROC_VPOS_BITS (LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
-
-#define LNET_PROC_CPT_MASK ((1ULL << LNET_PROC_CPT_BITS) - 1)
-#define LNET_PROC_VER_MASK ((1ULL << LNET_PROC_VER_BITS) - 1)
-#define LNET_PROC_HASH_MASK ((1ULL << LNET_PROC_HASH_BITS) - 1)
-#define LNET_PROC_HOFF_MASK ((1ULL << LNET_PROC_HOFF_BITS) - 1)
-
-#define LNET_PROC_CPT_GET(pos) \
- (int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
-
-#define LNET_PROC_VER_GET(pos) \
- (int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
-
-#define LNET_PROC_HASH_GET(pos) \
- (int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
-
-#define LNET_PROC_HOFF_GET(pos) \
- (int)((pos) & LNET_PROC_HOFF_MASK)
-
-#define LNET_PROC_POS_MAKE(cpt, ver, hash, off) \
- (((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) | \
- ((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) | \
- ((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
- ((off) & LNET_PROC_HOFF_MASK))
-
-#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK))
-
-static int __proc_lnet_stats(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- int rc;
- struct lnet_counters *ctrs;
- int len;
- char *tmpstr;
- const int tmpsiz = 256; /* 7 %u and 4 %llu */
-
- if (write) {
- lnet_counters_reset();
- return 0;
- }
-
- /* read */
-
- ctrs = kzalloc(sizeof(*ctrs), GFP_NOFS);
- if (!ctrs)
- return -ENOMEM;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr) {
- kfree(ctrs);
- return -ENOMEM;
- }
-
- lnet_counters_get(ctrs);
-
- len = snprintf(tmpstr, tmpsiz,
- "%u %u %u %u %u %u %u %llu %llu %llu %llu",
- ctrs->msgs_alloc, ctrs->msgs_max,
- ctrs->errors,
- ctrs->send_count, ctrs->recv_count,
- ctrs->route_count, ctrs->drop_count,
- ctrs->send_length, ctrs->recv_length,
- ctrs->route_length, ctrs->drop_length);
-
- if (pos >= min_t(int, len, strlen(tmpstr)))
- rc = 0;
- else
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, "\n");
-
- kfree(tmpstr);
- kfree(ctrs);
- return rc;
-}
-
-static int proc_lnet_stats(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_stats);
-}
-
-static int proc_lnet_routes(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- const int tmpsiz = 256;
- char *tmpstr;
- char *s;
- int rc = 0;
- int len;
- int ver;
- int off;
-
- BUILD_BUG_ON(sizeof(loff_t) < 4);
-
- off = LNET_PROC_HOFF_GET(*ppos);
- ver = LNET_PROC_VER_GET(*ppos);
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
- the_lnet.ln_routing ? "enabled" : "disabled");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %8s %7s %s\n",
- "net", "hops", "priority", "state", "router");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- lnet_net_lock(0);
- ver = (unsigned int)the_lnet.ln_remote_nets_version;
- lnet_net_unlock(0);
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- } else {
- struct list_head *n;
- struct list_head *r;
- struct lnet_route *route = NULL;
- struct lnet_remotenet *rnet = NULL;
- int skip = off - 1;
- struct list_head *rn_list;
- int i;
-
- lnet_net_lock(0);
-
- if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
- lnet_net_unlock(0);
- kfree(tmpstr);
- return -ESTALE;
- }
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
-
- n = rn_list->next;
-
- while (n != rn_list && !route) {
- rnet = list_entry(n, struct lnet_remotenet,
- lrn_list);
-
- r = rnet->lrn_routes.next;
-
- while (r != &rnet->lrn_routes) {
- struct lnet_route *re;
-
- re = list_entry(r, struct lnet_route,
- lr_list);
- if (!skip) {
- route = re;
- break;
- }
-
- skip--;
- r = r->next;
- }
-
- n = n->next;
- }
- }
-
- if (route) {
- __u32 net = rnet->lrn_net;
- __u32 hops = route->lr_hops;
- unsigned int priority = route->lr_priority;
- lnet_nid_t nid = route->lr_gateway->lp_nid;
- int alive = lnet_is_route_alive(route);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-8s %4u %8u %7s %s\n",
- libcfs_net2str(net), hops,
- priority,
- alive ? "up" : "down",
- libcfs_nid2str(nid));
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len)) {
- rc = -EFAULT;
- } else {
- off += 1;
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- }
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int proc_lnet_routers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int rc = 0;
- char *tmpstr;
- char *s;
- const int tmpsiz = 256;
- int len;
- int ver;
- int off;
-
- off = LNET_PROC_HOFF_GET(*ppos);
- ver = LNET_PROC_VER_GET(*ppos);
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
- "ref", "rtr_ref", "alive_cnt", "state",
- "last_ping", "ping_sent", "deadline",
- "down_ni", "router");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- lnet_net_lock(0);
- ver = (unsigned int)the_lnet.ln_routers_version;
- lnet_net_unlock(0);
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- } else {
- struct list_head *r;
- struct lnet_peer *peer = NULL;
- int skip = off - 1;
-
- lnet_net_lock(0);
-
- if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
- lnet_net_unlock(0);
-
- kfree(tmpstr);
- return -ESTALE;
- }
-
- r = the_lnet.ln_routers.next;
-
- while (r != &the_lnet.ln_routers) {
- struct lnet_peer *lp;
-
- lp = list_entry(r, struct lnet_peer, lp_rtr_list);
- if (!skip) {
- peer = lp;
- break;
- }
-
- skip--;
- r = r->next;
- }
-
- if (peer) {
- lnet_nid_t nid = peer->lp_nid;
- unsigned long now = cfs_time_current();
- unsigned long deadline = peer->lp_ping_deadline;
- int nrefs = peer->lp_refcount;
- int nrtrrefs = peer->lp_rtr_refcount;
- int alive_cnt = peer->lp_alive_count;
- int alive = peer->lp_alive;
- int pingsent = !peer->lp_ping_notsent;
- int last_ping = cfs_duration_sec(cfs_time_sub(now,
- peer->lp_ping_timestamp));
- int down_ni = 0;
- struct lnet_route *rtr;
-
- if ((peer->lp_ping_feats &
- LNET_PING_FEAT_NI_STATUS)) {
- list_for_each_entry(rtr, &peer->lp_routes,
- lr_gwlist) {
- /*
- * downis on any route should be the
- * number of downis on the gateway
- */
- if (rtr->lr_downis) {
- down_ni = rtr->lr_downis;
- break;
- }
- }
- }
-
- if (!deadline)
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
- nrefs, nrtrrefs, alive_cnt,
- alive ? "up" : "down", last_ping,
- pingsent, "NA", down_ni,
- libcfs_nid2str(nid));
- else
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
- nrefs, nrtrrefs, alive_cnt,
- alive ? "up" : "down", last_ping,
- pingsent,
- cfs_duration_sec(cfs_time_sub(deadline, now)),
- down_ni, libcfs_nid2str(nid));
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len)) {
- rc = -EFAULT;
- } else {
- off += 1;
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- }
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int proc_lnet_peers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- const int tmpsiz = 256;
- struct lnet_peer_table *ptable;
- char *tmpstr;
- char *s;
- int cpt = LNET_PROC_CPT_GET(*ppos);
- int ver = LNET_PROC_VER_GET(*ppos);
- int hash = LNET_PROC_HASH_GET(*ppos);
- int hoff = LNET_PROC_HOFF_GET(*ppos);
- int rc = 0;
- int len;
-
- BUILD_BUG_ON(LNET_PROC_HASH_BITS < LNET_PEER_HASH_BITS);
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- if (cpt >= LNET_CPT_NUMBER) {
- *lenp = 0;
- return 0;
- }
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
- "nid", "refs", "state", "last", "max",
- "rtr", "min", "tx", "min", "queue");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- hoff++;
- } else {
- struct lnet_peer *peer;
- struct list_head *p;
- int skip;
- again:
- p = NULL;
- peer = NULL;
- skip = hoff - 1;
-
- lnet_net_lock(cpt);
- ptable = the_lnet.ln_peer_tables[cpt];
- if (hoff == 1)
- ver = LNET_PROC_VERSION(ptable->pt_version);
-
- if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
- lnet_net_unlock(cpt);
- kfree(tmpstr);
- return -ESTALE;
- }
-
- while (hash < LNET_PEER_HASH_SIZE) {
- if (!p)
- p = ptable->pt_hash[hash].next;
-
- while (p != &ptable->pt_hash[hash]) {
- struct lnet_peer *lp;
-
- lp = list_entry(p, struct lnet_peer,
- lp_hashlist);
- if (!skip) {
- peer = lp;
-
- /*
- * minor optimization: start from idx+1
- * on next iteration if we've just
- * drained lp_hashlist
- */
- if (lp->lp_hashlist.next ==
- &ptable->pt_hash[hash]) {
- hoff = 1;
- hash++;
- } else {
- hoff++;
- }
-
- break;
- }
-
- skip--;
- p = lp->lp_hashlist.next;
- }
-
- if (peer)
- break;
-
- p = NULL;
- hoff = 1;
- hash++;
- }
-
- if (peer) {
- lnet_nid_t nid = peer->lp_nid;
- int nrefs = peer->lp_refcount;
- int lastalive = -1;
- char *aliveness = "NA";
- int maxcr = peer->lp_ni->ni_peertxcredits;
- int txcr = peer->lp_txcredits;
- int mintxcr = peer->lp_mintxcredits;
- int rtrcr = peer->lp_rtrcredits;
- int minrtrcr = peer->lp_minrtrcredits;
- int txqnob = peer->lp_txqnob;
-
- if (lnet_isrouter(peer) ||
- lnet_peer_aliveness_enabled(peer))
- aliveness = peer->lp_alive ? "up" : "down";
-
- if (lnet_peer_aliveness_enabled(peer)) {
- unsigned long now = cfs_time_current();
- long delta;
-
- delta = cfs_time_sub(now, peer->lp_last_alive);
- lastalive = cfs_duration_sec(delta);
-
- /* No need to mess up peers contents with
- * arbitrarily long integers - it suffices to
- * know that lastalive is more than 10000s old
- */
- if (lastalive >= 10000)
- lastalive = 9999;
- }
-
- lnet_net_unlock(cpt);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
- libcfs_nid2str(nid), nrefs, aliveness,
- lastalive, maxcr, rtrcr, minrtrcr, txcr,
- mintxcr, txqnob);
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- } else { /* peer is NULL */
- lnet_net_unlock(cpt);
- }
-
- if (hash == LNET_PEER_HASH_SIZE) {
- cpt++;
- hash = 0;
- hoff = 1;
- if (!peer && cpt < LNET_CPT_NUMBER)
- goto again;
- }
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len))
- rc = -EFAULT;
- else
- *ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int __proc_lnet_buffers(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- char *s;
- char *tmpstr;
- int tmpsiz;
- int idx;
- int len;
- int rc;
- int i;
-
- LASSERT(!write);
-
- /* (4 %d) * 4 * LNET_CPT_NUMBER */
- tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
- tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%5s %5s %7s %7s\n",
- "pages", "count", "credits", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- if (!the_lnet.ln_rtrpools)
- goto out; /* I'm not a router */
-
- for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
- struct lnet_rtrbufpool *rbp;
-
- lnet_net_lock(LNET_LOCK_EX);
- cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%5d %5d %7d %7d\n",
- rbp[idx].rbp_npages,
- rbp[idx].rbp_nbuffers,
- rbp[idx].rbp_credits,
- rbp[idx].rbp_mincredits);
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- out:
- len = s - tmpstr;
-
- if (pos >= min_t(int, len, strlen(tmpstr)))
- rc = 0;
- else
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, NULL);
-
- kvfree(tmpstr);
- return rc;
-}
-
-static int proc_lnet_buffers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_buffers);
-}
-
-static int proc_lnet_nis(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int tmpsiz = 128 * LNET_CPT_NUMBER;
- int rc = 0;
- char *tmpstr;
- char *s;
- int len;
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
- "nid", "status", "alive", "refs", "peer",
- "rtr", "max", "tx", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
- } else {
- struct list_head *n;
- struct lnet_ni *ni = NULL;
- int skip = *ppos - 1;
-
- lnet_net_lock(0);
-
- n = the_lnet.ln_nis.next;
-
- while (n != &the_lnet.ln_nis) {
- struct lnet_ni *a_ni;
-
- a_ni = list_entry(n, struct lnet_ni, ni_list);
- if (!skip) {
- ni = a_ni;
- break;
- }
-
- skip--;
- n = n->next;
- }
-
- if (ni) {
- struct lnet_tx_queue *tq;
- char *stat;
- time64_t now = ktime_get_real_seconds();
- int last_alive = -1;
- int i;
- int j;
-
- if (the_lnet.ln_routing)
- last_alive = now - ni->ni_last_alive;
-
- /* @lo forever alive */
- if (ni->ni_lnd->lnd_type == LOLND)
- last_alive = 0;
-
- lnet_ni_lock(ni);
- LASSERT(ni->ni_status);
- stat = (ni->ni_status->ns_status ==
- LNET_NI_STATUS_UP) ? "up" : "down";
- lnet_ni_unlock(ni);
-
- /*
- * we actually output credits information for
- * TX queue of each partition
- */
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
- for (j = 0; ni->ni_cpts &&
- j < ni->ni_ncpts; j++) {
- if (i == ni->ni_cpts[j])
- break;
- }
-
- if (j == ni->ni_ncpts)
- continue;
-
- if (i)
- lnet_net_lock(i);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
- libcfs_nid2str(ni->ni_nid), stat,
- last_alive, *ni->ni_refs[i],
- ni->ni_peertxcredits,
- ni->ni_peerrtrcredits,
- tq->tq_credits_max,
- tq->tq_credits,
- tq->tq_credits_min);
- if (i)
- lnet_net_unlock(i);
- }
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len))
- rc = -EFAULT;
- else
- *ppos += 1;
- }
-
- kvfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-struct lnet_portal_rotors {
- int pr_value;
- const char *pr_name;
- const char *pr_desc;
-};
-
-static struct lnet_portal_rotors portal_rotors[] = {
- {
- .pr_value = LNET_PTL_ROTOR_OFF,
- .pr_name = "OFF",
- .pr_desc = "Turn off message rotor for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_ON,
- .pr_name = "ON",
- .pr_desc = "round-robin dispatch all PUT messages for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_RR_RT,
- .pr_name = "RR_RT",
- .pr_desc = "round-robin dispatch routed PUT message for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_HASH_RT,
- .pr_name = "HASH_RT",
- .pr_desc = "dispatch routed PUT message by hashing source NID for wildcard portals"
- },
- {
- .pr_value = -1,
- .pr_name = NULL,
- .pr_desc = NULL
- },
-};
-
-static int __proc_lnet_portal_rotor(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- const int buf_len = 128;
- char *buf;
- char *tmp;
- int rc;
- int i;
-
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (!write) {
- lnet_res_lock(0);
-
- for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
- if (portal_rotors[i].pr_value == portal_rotor)
- break;
- }
-
- LASSERT(portal_rotors[i].pr_value == portal_rotor);
- lnet_res_unlock(0);
-
- rc = snprintf(buf, buf_len,
- "{\n\tportals: all\n"
- "\trotor: %s\n\tdescription: %s\n}",
- portal_rotors[i].pr_name,
- portal_rotors[i].pr_desc);
-
- if (pos >= min_t(int, rc, buf_len)) {
- rc = 0;
- } else {
- rc = cfs_trace_copyout_string(buffer, nob,
- buf + pos, "\n");
- }
- goto out;
- }
-
- rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
- if (rc < 0)
- goto out;
-
- tmp = strim(buf);
-
- rc = -EINVAL;
- lnet_res_lock(0);
- for (i = 0; portal_rotors[i].pr_name; i++) {
- if (!strncasecmp(portal_rotors[i].pr_name, tmp,
- strlen(portal_rotors[i].pr_name))) {
- portal_rotor = portal_rotors[i].pr_value;
- rc = 0;
- break;
- }
- }
- lnet_res_unlock(0);
-out:
- kfree(buf);
- return rc;
-}
-
-static int proc_lnet_portal_rotor(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_portal_rotor);
-}
-
-static struct ctl_table lnet_table[] = {
- /*
- * NB No .strategy entries have been provided since sysctl(8) prefers
- * to go via /proc for portability.
- */
- {
- .procname = "stats",
- .mode = 0644,
- .proc_handler = &proc_lnet_stats,
- },
- {
- .procname = "routes",
- .mode = 0444,
- .proc_handler = &proc_lnet_routes,
- },
- {
- .procname = "routers",
- .mode = 0444,
- .proc_handler = &proc_lnet_routers,
- },
- {
- .procname = "peers",
- .mode = 0444,
- .proc_handler = &proc_lnet_peers,
- },
- {
- .procname = "buffers",
- .mode = 0444,
- .proc_handler = &proc_lnet_buffers,
- },
- {
- .procname = "nis",
- .mode = 0444,
- .proc_handler = &proc_lnet_nis,
- },
- {
- .procname = "portal_rotor",
- .mode = 0644,
- .proc_handler = &proc_lnet_portal_rotor,
- },
- {
- }
-};
-
-void lnet_router_debugfs_init(void)
-{
- lustre_insert_debugfs(lnet_table, NULL);
-}
-
-void lnet_router_debugfs_fini(void)
-{
-}
diff --git a/drivers/staging/lustre/lnet/selftest/Makefile b/drivers/staging/lustre/lnet/selftest/Makefile
deleted file mode 100644
index 3ccc8966b566..000000000000
--- a/drivers/staging/lustre/lnet/selftest/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o
-
-lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \
- module.o ping_test.o brw_test.o
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
deleted file mode 100644
index f1ee219bc8f3..000000000000
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ /dev/null
@@ -1,526 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/brw_test.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-
-#include "selftest.h"
-
-static int brw_srv_workitems = SFW_TEST_WI_MAX;
-module_param(brw_srv_workitems, int, 0644);
-MODULE_PARM_DESC(brw_srv_workitems, "# BRW server workitems");
-
-static int brw_inject_errors;
-module_param(brw_inject_errors, int, 0644);
-MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default");
-
-#define BRW_POISON 0xbeefbeefbeefbeefULL
-#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL
-#define BRW_MSIZE sizeof(u64)
-
-static void
-brw_client_fini(struct sfw_test_instance *tsi)
-{
- struct srpc_bulk *bulk;
- struct sfw_test_unit *tsu;
-
- LASSERT(tsi->tsi_is_client);
-
- list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- bulk = tsu->tsu_private;
- if (!bulk)
- continue;
-
- srpc_free_bulk(bulk);
- tsu->tsu_private = NULL;
- }
-}
-
-static int
-brw_client_init(struct sfw_test_instance *tsi)
-{
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- int flags;
- int off;
- int npg;
- int len;
- int opc;
- struct srpc_bulk *bulk;
- struct sfw_test_unit *tsu;
-
- LASSERT(sn);
- LASSERT(tsi->tsi_is_client);
-
- if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
- struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- npg = breq->blk_npg;
- /*
- * NB: this is not going to work for variable page size,
- * but we have to keep it for compatibility
- */
- len = npg * PAGE_SIZE;
- off = 0;
- } else {
- struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
-
- /*
- * I should never get this step if it's unknown feature
- * because make_session will reject unknown feature
- */
- LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- len = breq->blk_len;
- off = breq->blk_offset & ~PAGE_MASK;
- npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- }
-
- if (off % BRW_MSIZE)
- return -EINVAL;
-
- if (npg > LNET_MAX_IOV || npg <= 0)
- return -EINVAL;
-
- if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
- return -EINVAL;
-
- if (flags != LST_BRW_CHECK_NONE &&
- flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
- return -EINVAL;
-
- list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
- off, npg, len, opc == LST_BRW_READ);
- if (!bulk) {
- brw_client_fini(tsi);
- return -ENOMEM;
- }
-
- tsu->tsu_private = bulk;
- }
-
- return 0;
-}
-
-static int brw_inject_one_error(void)
-{
- struct timespec64 ts;
-
- if (brw_inject_errors <= 0)
- return 0;
-
- ktime_get_ts64(&ts);
-
- if (!((ts.tv_nsec / NSEC_PER_USEC) & 1))
- return 0;
-
- return brw_inject_errors--;
-}
-
-static void
-brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic)
-{
- char *addr = page_address(pg) + off;
- int i;
-
- LASSERT(addr);
- LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
-
- if (pattern == LST_BRW_CHECK_NONE)
- return;
-
- if (magic == BRW_MAGIC)
- magic += brw_inject_one_error();
-
- if (pattern == LST_BRW_CHECK_SIMPLE) {
- memcpy(addr, &magic, BRW_MSIZE);
- if (len > BRW_MSIZE) {
- addr += PAGE_SIZE - BRW_MSIZE;
- memcpy(addr, &magic, BRW_MSIZE);
- }
- return;
- }
-
- if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < len; i += BRW_MSIZE)
- memcpy(addr + i, &magic, BRW_MSIZE);
- return;
- }
-
- LBUG();
-}
-
-static int
-brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic)
-{
- char *addr = page_address(pg) + off;
- __u64 data = 0; /* make compiler happy */
- int i;
-
- LASSERT(addr);
- LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
-
- if (pattern == LST_BRW_CHECK_NONE)
- return 0;
-
- if (pattern == LST_BRW_CHECK_SIMPLE) {
- data = *((__u64 *)addr);
- if (data != magic)
- goto bad_data;
-
- if (len > BRW_MSIZE) {
- addr += PAGE_SIZE - BRW_MSIZE;
- data = *((__u64 *)addr);
- if (data != magic)
- goto bad_data;
- }
- return 0;
- }
-
- if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < len; i += BRW_MSIZE) {
- data = *(u64 *)(addr + i);
- if (data != magic)
- goto bad_data;
- }
- return 0;
- }
-
- LBUG();
-
-bad_data:
- CERROR("Bad data in page %p: %#llx, %#llx expected\n",
- pg, data, magic);
- return 1;
-}
-
-static void
-brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
-{
- int i;
- struct page *pg;
-
- for (i = 0; i < bk->bk_niov; i++) {
- int off, len;
-
- pg = bk->bk_iovs[i].bv_page;
- off = bk->bk_iovs[i].bv_offset;
- len = bk->bk_iovs[i].bv_len;
- brw_fill_page(pg, off, len, pattern, magic);
- }
-}
-
-static int
-brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
-{
- int i;
- struct page *pg;
-
- for (i = 0; i < bk->bk_niov; i++) {
- int off, len;
-
- pg = bk->bk_iovs[i].bv_page;
- off = bk->bk_iovs[i].bv_offset;
- len = bk->bk_iovs[i].bv_len;
- if (brw_check_page(pg, off, len, pattern, magic)) {
- CERROR("Bulk page %p (%d/%d) is corrupted!\n",
- pg, i, bk->bk_niov);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int
-brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
- struct srpc_client_rpc **rpcpp)
-{
- struct srpc_bulk *bulk = tsu->tsu_private;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct srpc_client_rpc *rpc;
- struct srpc_brw_reqst *req;
- int flags;
- int npg;
- int len;
- int opc;
- int rc;
-
- LASSERT(sn);
- LASSERT(bulk);
-
- if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
- struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- npg = breq->blk_npg;
- len = npg * PAGE_SIZE;
- } else {
- struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
- int off;
-
- /*
- * I should never get this step if it's unknown feature
- * because make_session will reject unknown feature
- */
- LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- len = breq->blk_len;
- off = breq->blk_offset;
- npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- }
-
- rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
- if (rc)
- return rc;
-
- memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg]));
- if (opc == LST_BRW_WRITE)
- brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
- else
- brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
-
- req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
- req->brw_flags = flags;
- req->brw_rw = opc;
- req->brw_len = len;
-
- *rpcpp = rpc;
- return 0;
-}
-
-static void
-brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
-{
- __u64 magic = BRW_MAGIC;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct srpc_msg *msg = &rpc->crpc_replymsg;
- struct srpc_brw_reply *reply = &msg->msg_body.brw_reply;
- struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
-
- LASSERT(sn);
-
- if (rpc->crpc_status) {
- CERROR("BRW RPC to %s failed with %d\n",
- libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
- if (!tsi->tsi_stopping) /* rpc could have been aborted */
- atomic_inc(&sn->sn_brw_errors);
- return;
- }
-
- if (msg->msg_magic != SRPC_MSG_MAGIC) {
- __swab64s(&magic);
- __swab32s(&reply->brw_status);
- }
-
- CDEBUG(reply->brw_status ? D_WARNING : D_NET,
- "BRW RPC to %s finished with brw_status: %d\n",
- libcfs_id2str(rpc->crpc_dest), reply->brw_status);
-
- if (reply->brw_status) {
- atomic_inc(&sn->sn_brw_errors);
- rpc->crpc_status = -(int)reply->brw_status;
- return;
- }
-
- if (reqst->brw_rw == LST_BRW_WRITE)
- return;
-
- if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic)) {
- CERROR("Bulk data from %s is corrupted!\n",
- libcfs_id2str(rpc->crpc_dest));
- atomic_inc(&sn->sn_brw_errors);
- rpc->crpc_status = -EBADMSG;
- }
-}
-
-static void
-brw_server_rpc_done(struct srpc_server_rpc *rpc)
-{
- struct srpc_bulk *blk = rpc->srpc_bulk;
-
- if (!blk)
- return;
-
- if (rpc->srpc_status)
- CERROR("Bulk transfer %s %s has failed: %d\n",
- blk->bk_sink ? "from" : "to",
- libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
- else
- CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n",
- blk->bk_niov, blk->bk_sink ? "from" : "to",
- libcfs_id2str(rpc->srpc_peer));
-
- sfw_free_pages(rpc);
-}
-
-static int
-brw_bulk_ready(struct srpc_server_rpc *rpc, int status)
-{
- __u64 magic = BRW_MAGIC;
- struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
- struct srpc_brw_reqst *reqst;
- struct srpc_msg *reqstmsg;
-
- LASSERT(rpc->srpc_bulk);
- LASSERT(rpc->srpc_reqstbuf);
-
- reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- reqst = &reqstmsg->msg_body.brw_reqst;
-
- if (status) {
- CERROR("BRW bulk %s failed for RPC from %s: %d\n",
- reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
- libcfs_id2str(rpc->srpc_peer), status);
- return -EIO;
- }
-
- if (reqst->brw_rw == LST_BRW_READ)
- return 0;
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
- __swab64s(&magic);
-
- if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic)) {
- CERROR("Bulk data from %s is corrupted!\n",
- libcfs_id2str(rpc->srpc_peer));
- reply->brw_status = EBADMSG;
- }
-
- return 0;
-}
-
-static int
-brw_server_handle(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- struct srpc_msg *replymsg = &rpc->srpc_replymsg;
- struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
- struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
- int npg;
- int rc;
-
- LASSERT(sv->sv_id == SRPC_SERVICE_BRW);
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
- LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- __swab32s(&reqst->brw_rw);
- __swab32s(&reqst->brw_len);
- __swab32s(&reqst->brw_flags);
- __swab64s(&reqst->brw_rpyid);
- __swab64s(&reqst->brw_bulkid);
- }
- LASSERT(reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id));
-
- reply->brw_status = 0;
- rpc->srpc_done = brw_server_rpc_done;
-
- if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
- (reqst->brw_flags != LST_BRW_CHECK_NONE &&
- reqst->brw_flags != LST_BRW_CHECK_FULL &&
- reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
- reply->brw_status = EINVAL;
- return 0;
- }
-
- if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
- replymsg->msg_ses_feats = LST_FEATS_MASK;
- reply->brw_status = EPROTO;
- return 0;
- }
-
- if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
- /* compat with old version */
- if (reqst->brw_len & ~PAGE_MASK) {
- reply->brw_status = EINVAL;
- return 0;
- }
- npg = reqst->brw_len >> PAGE_SHIFT;
-
- } else {
- npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- }
-
- replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
-
- if (!reqst->brw_len || npg > LNET_MAX_IOV) {
- reply->brw_status = EINVAL;
- return 0;
- }
-
- rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
- reqst->brw_len,
- reqst->brw_rw == LST_BRW_WRITE);
- if (rc)
- return rc;
-
- if (reqst->brw_rw == LST_BRW_READ)
- brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
- else
- brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
-
- return 0;
-}
-
-struct sfw_test_client_ops brw_test_client;
-
-void brw_init_test_client(void)
-{
- brw_test_client.tso_init = brw_client_init;
- brw_test_client.tso_fini = brw_client_fini;
- brw_test_client.tso_prep_rpc = brw_client_prep_rpc;
- brw_test_client.tso_done_rpc = brw_client_done_rpc;
-};
-
-struct srpc_service brw_test_service;
-
-void brw_init_test_service(void)
-{
- brw_test_service.sv_id = SRPC_SERVICE_BRW;
- brw_test_service.sv_name = "brw_test";
- brw_test_service.sv_handler = brw_server_handle;
- brw_test_service.sv_bulk_ready = brw_bulk_ready;
- brw_test_service.sv_wi_total = brw_srv_workitems;
-}
diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
deleted file mode 100644
index a2d8092bdeb7..000000000000
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ /dev/null
@@ -1,799 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * IOC handle in kernel
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "console.h"
-
-static int
-lst_session_new_ioctl(struct lstio_session_new_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
-
- if (!args->lstio_ses_idp || /* address for output sid */
- !args->lstio_ses_key || /* no key is specified */
- !args->lstio_ses_namep || /* session name */
- args->lstio_ses_nmlen <= 0 ||
- args->lstio_ses_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_ses_namep,
- args->lstio_ses_nmlen)) {
- return -EFAULT;
- }
-
- name[args->lstio_ses_nmlen] = 0;
-
- rc = lstcon_session_new(name,
- args->lstio_ses_key,
- args->lstio_ses_feats,
- args->lstio_ses_timeout,
- args->lstio_ses_force,
- args->lstio_ses_idp);
-
- return rc;
-}
-
-static int
-lst_session_end_ioctl(struct lstio_session_end_args *args)
-{
- if (args->lstio_ses_key != console_session.ses_key)
- return -EACCES;
-
- return lstcon_session_end();
-}
-
-static int
-lst_session_info_ioctl(struct lstio_session_info_args *args)
-{
- /* no checking of key */
-
- if (!args->lstio_ses_idp || /* address for output sid */
- !args->lstio_ses_keyp || /* address for output key */
- !args->lstio_ses_featp || /* address for output features */
- !args->lstio_ses_ndinfo || /* address for output ndinfo */
- !args->lstio_ses_namep || /* address for output name */
- args->lstio_ses_nmlen <= 0 ||
- args->lstio_ses_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_session_info(args->lstio_ses_idp,
- args->lstio_ses_keyp,
- args->lstio_ses_featp,
- args->lstio_ses_ndinfo,
- args->lstio_ses_namep,
- args->lstio_ses_nmlen);
-}
-
-static int
-lst_debug_ioctl(struct lstio_debug_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int client = 1;
- int rc;
-
- if (args->lstio_dbg_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_dbg_resultp)
- return -EINVAL;
-
- if (args->lstio_dbg_namep && /* name of batch/group */
- (args->lstio_dbg_nmlen <= 0 ||
- args->lstio_dbg_nmlen > LST_NAME_SIZE))
- return -EINVAL;
-
- if (args->lstio_dbg_namep) {
-
- if (copy_from_user(name, args->lstio_dbg_namep,
- args->lstio_dbg_nmlen))
- return -EFAULT;
-
- name[args->lstio_dbg_nmlen] = 0;
- }
-
- rc = -EINVAL;
-
- switch (args->lstio_dbg_type) {
- case LST_OPC_SESSION:
- rc = lstcon_session_debug(args->lstio_dbg_timeout,
- args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_BATCHSRV:
- client = 0;
- /* fall through */
- case LST_OPC_BATCHCLI:
- if (!args->lstio_dbg_namep)
- goto out;
-
- rc = lstcon_batch_debug(args->lstio_dbg_timeout,
- name, client, args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_GROUP:
- if (!args->lstio_dbg_namep)
- goto out;
-
- rc = lstcon_group_debug(args->lstio_dbg_timeout,
- name, args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_NODES:
- if (args->lstio_dbg_count <= 0 ||
- !args->lstio_dbg_idsp)
- goto out;
-
- rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
- args->lstio_dbg_count,
- args->lstio_dbg_idsp,
- args->lstio_dbg_resultp);
- break;
-
- default:
- break;
- }
-
-out:
- return rc;
-}
-
-static int
-lst_group_add_ioctl(struct lstio_group_add_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_add(name);
-
- return rc;
-}
-
-static int
-lst_group_del_ioctl(struct lstio_group_del_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_del(name);
-
- return rc;
-}
-
-static int
-lst_group_update_ioctl(struct lstio_group_update_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_resultp ||
- !args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- switch (args->lstio_grp_opc) {
- case LST_GROUP_CLEAN:
- rc = lstcon_group_clean(name, args->lstio_grp_args);
- break;
-
- case LST_GROUP_REFRESH:
- rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
- break;
-
- case LST_GROUP_RMND:
- if (args->lstio_grp_count <= 0 ||
- !args->lstio_grp_idsp) {
- rc = -EINVAL;
- break;
- }
- rc = lstcon_nodes_remove(name, args->lstio_grp_count,
- args->lstio_grp_idsp,
- args->lstio_grp_resultp);
- break;
-
- default:
- rc = -EINVAL;
- break;
- }
-
- return rc;
-}
-
-static int
-lst_nodes_add_ioctl(struct lstio_group_nodes_args *args)
-{
- unsigned int feats;
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_idsp || /* array of ids */
- args->lstio_grp_count <= 0 ||
- !args->lstio_grp_resultp ||
- !args->lstio_grp_featp ||
- !args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_nodes_add(name, args->lstio_grp_count,
- args->lstio_grp_idsp, &feats,
- args->lstio_grp_resultp);
-
- if (!rc &&
- copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) {
- return -EINVAL;
- }
-
- return rc;
-}
-
-static int
-lst_group_list_ioctl(struct lstio_group_list_args *args)
-{
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_idx < 0 ||
- !args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_group_list(args->lstio_grp_idx,
- args->lstio_grp_nmlen,
- args->lstio_grp_namep);
-}
-
-static int
-lst_group_info_ioctl(struct lstio_group_info_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int ndent;
- int index;
- int rc;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (!args->lstio_grp_entp && /* output: group entry */
- !args->lstio_grp_dentsp) /* output: node entry */
- return -EINVAL;
-
- if (args->lstio_grp_dentsp) { /* have node entry */
- if (!args->lstio_grp_idxp || /* node index */
- !args->lstio_grp_ndentp) /* # of node entry */
- return -EINVAL;
-
- if (copy_from_user(&ndent, args->lstio_grp_ndentp,
- sizeof(ndent)) ||
- copy_from_user(&index, args->lstio_grp_idxp,
- sizeof(index)))
- return -EFAULT;
-
- if (ndent <= 0 || index < 0)
- return -EINVAL;
- }
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_info(name, args->lstio_grp_entp,
- &index, &ndent, args->lstio_grp_dentsp);
-
- if (rc)
- return rc;
-
- if (args->lstio_grp_dentsp &&
- (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
- copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
- return -EFAULT;
-
- return 0;
-}
-
-static int
-lst_batch_add_ioctl(struct lstio_batch_add_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_add(name);
-
- return rc;
-}
-
-static int
-lst_batch_run_ioctl(struct lstio_batch_run_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_run(name, args->lstio_bat_timeout,
- args->lstio_bat_resultp);
-
- return rc;
-}
-
-static int
-lst_batch_stop_ioctl(struct lstio_batch_stop_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_resultp ||
- !args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_stop(name, args->lstio_bat_force,
- args->lstio_bat_resultp);
-
- return rc;
-}
-
-static int
-lst_batch_query_ioctl(struct lstio_batch_query_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_resultp ||
- !args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (args->lstio_bat_testidx < 0)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_test_batch_query(name,
- args->lstio_bat_testidx,
- args->lstio_bat_client,
- args->lstio_bat_timeout,
- args->lstio_bat_resultp);
-
- return rc;
-}
-
-static int
-lst_batch_list_ioctl(struct lstio_batch_list_args *args)
-{
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_idx < 0 ||
- !args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_batch_list(args->lstio_bat_idx,
- args->lstio_bat_nmlen,
- args->lstio_bat_namep);
-}
-
-static int
-lst_batch_info_ioctl(struct lstio_batch_info_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
- int index;
- int ndent;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_namep || /* batch name */
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (!args->lstio_bat_entp && /* output: batch entry */
- !args->lstio_bat_dentsp) /* output: node entry */
- return -EINVAL;
-
- if (args->lstio_bat_dentsp) { /* have node entry */
- if (!args->lstio_bat_idxp || /* node index */
- !args->lstio_bat_ndentp) /* # of node entry */
- return -EINVAL;
-
- if (copy_from_user(&index, args->lstio_bat_idxp,
- sizeof(index)) ||
- copy_from_user(&ndent, args->lstio_bat_ndentp,
- sizeof(ndent)))
- return -EFAULT;
-
- if (ndent <= 0 || index < 0)
- return -EINVAL;
- }
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_info(name, args->lstio_bat_entp,
- args->lstio_bat_server, args->lstio_bat_testidx,
- &index, &ndent, args->lstio_bat_dentsp);
-
- if (rc)
- return rc;
-
- if (args->lstio_bat_dentsp &&
- (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
- copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
- rc = -EFAULT;
-
- return rc;
-}
-
-static int
-lst_stat_query_ioctl(struct lstio_stat_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- /* TODO: not finished */
- if (args->lstio_sta_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_sta_resultp)
- return -EINVAL;
-
- if (args->lstio_sta_idsp) {
- if (args->lstio_sta_count <= 0)
- return -EINVAL;
-
- rc = lstcon_nodes_stat(args->lstio_sta_count,
- args->lstio_sta_idsp,
- args->lstio_sta_timeout,
- args->lstio_sta_resultp);
- } else if (args->lstio_sta_namep) {
- if (args->lstio_sta_nmlen <= 0 ||
- args->lstio_sta_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- rc = copy_from_user(name, args->lstio_sta_namep,
- args->lstio_sta_nmlen);
- if (!rc)
- rc = lstcon_group_stat(name, args->lstio_sta_timeout,
- args->lstio_sta_resultp);
- else
- rc = -EFAULT;
- } else {
- rc = -EINVAL;
- }
-
- return rc;
-}
-
-static int lst_test_add_ioctl(struct lstio_test_args *args)
-{
- char batch_name[LST_NAME_SIZE + 1];
- char src_name[LST_NAME_SIZE + 1];
- char dst_name[LST_NAME_SIZE + 1];
- void *param = NULL;
- int ret = 0;
- int rc = -ENOMEM;
-
- if (!args->lstio_tes_resultp ||
- !args->lstio_tes_retp ||
- !args->lstio_tes_bat_name || /* no specified batch */
- args->lstio_tes_bat_nmlen <= 0 ||
- args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
- !args->lstio_tes_sgrp_name || /* no source group */
- args->lstio_tes_sgrp_nmlen <= 0 ||
- args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
- !args->lstio_tes_dgrp_name || /* no target group */
- args->lstio_tes_dgrp_nmlen <= 0 ||
- args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (!args->lstio_tes_loop || /* negative is infinite */
- args->lstio_tes_concur <= 0 ||
- args->lstio_tes_dist <= 0 ||
- args->lstio_tes_span <= 0)
- return -EINVAL;
-
- /* have parameter, check if parameter length is valid */
- if (args->lstio_tes_param &&
- (args->lstio_tes_param_len <= 0 ||
- args->lstio_tes_param_len >
- PAGE_SIZE - sizeof(struct lstcon_test)))
- return -EINVAL;
-
- /* Enforce zero parameter length if there's no parameter */
- if (!args->lstio_tes_param && args->lstio_tes_param_len)
- return -EINVAL;
-
- if (args->lstio_tes_param) {
- param = memdup_user(args->lstio_tes_param,
- args->lstio_tes_param_len);
- if (IS_ERR(param))
- return PTR_ERR(param);
- }
-
- rc = -EFAULT;
- if (copy_from_user(batch_name, args->lstio_tes_bat_name,
- args->lstio_tes_bat_nmlen) ||
- copy_from_user(src_name, args->lstio_tes_sgrp_name,
- args->lstio_tes_sgrp_nmlen) ||
- copy_from_user(dst_name, args->lstio_tes_dgrp_name,
- args->lstio_tes_dgrp_nmlen))
- goto out;
-
- rc = lstcon_test_add(batch_name, args->lstio_tes_type,
- args->lstio_tes_loop, args->lstio_tes_concur,
- args->lstio_tes_dist, args->lstio_tes_span,
- src_name, dst_name, param,
- args->lstio_tes_param_len,
- &ret, args->lstio_tes_resultp);
-
- if (!rc && ret)
- rc = (copy_to_user(args->lstio_tes_retp, &ret,
- sizeof(ret))) ? -EFAULT : 0;
-out:
- kfree(param);
-
- return rc;
-}
-
-int
-lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr)
-{
- char *buf;
- struct libcfs_ioctl_data *data;
- int opc;
- int rc;
-
- if (cmd != IOC_LIBCFS_LNETST)
- return -EINVAL;
-
- data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
-
- opc = data->ioc_u32[0];
-
- if (data->ioc_plen1 > PAGE_SIZE)
- return -EINVAL;
-
- buf = kmalloc(data->ioc_plen1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- /* copy in parameter */
- if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
- kfree(buf);
- return -EFAULT;
- }
-
- mutex_lock(&console_session.ses_mutex);
-
- console_session.ses_laststamp = ktime_get_real_seconds();
-
- if (console_session.ses_shutdown) {
- rc = -ESHUTDOWN;
- goto out;
- }
-
- if (console_session.ses_expired)
- lstcon_session_end();
-
- if (opc != LSTIO_SESSION_NEW &&
- console_session.ses_state == LST_SESSION_NONE) {
- CDEBUG(D_NET, "LST no active session\n");
- rc = -ESRCH;
- goto out;
- }
-
- memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat));
-
- switch (opc) {
- case LSTIO_SESSION_NEW:
- rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf);
- break;
- case LSTIO_SESSION_END:
- rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf);
- break;
- case LSTIO_SESSION_INFO:
- rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf);
- break;
- case LSTIO_DEBUG:
- rc = lst_debug_ioctl((struct lstio_debug_args *)buf);
- break;
- case LSTIO_GROUP_ADD:
- rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf);
- break;
- case LSTIO_GROUP_DEL:
- rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf);
- break;
- case LSTIO_GROUP_UPDATE:
- rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf);
- break;
- case LSTIO_NODES_ADD:
- rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf);
- break;
- case LSTIO_GROUP_LIST:
- rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf);
- break;
- case LSTIO_GROUP_INFO:
- rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf);
- break;
- case LSTIO_BATCH_ADD:
- rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf);
- break;
- case LSTIO_BATCH_START:
- rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf);
- break;
- case LSTIO_BATCH_STOP:
- rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf);
- break;
- case LSTIO_BATCH_QUERY:
- rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf);
- break;
- case LSTIO_BATCH_LIST:
- rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf);
- break;
- case LSTIO_BATCH_INFO:
- rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf);
- break;
- case LSTIO_TEST_ADD:
- rc = lst_test_add_ioctl((struct lstio_test_args *)buf);
- break;
- case LSTIO_STAT_QUERY:
- rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf);
- break;
- default:
- rc = -EINVAL;
- }
-
- if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
- sizeof(struct lstcon_trans_stat)))
- rc = -EFAULT;
-out:
- mutex_unlock(&console_session.ses_mutex);
-
- kfree(buf);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
deleted file mode 100644
index 6dcc966b293b..000000000000
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ /dev/null
@@ -1,1397 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Console framework rpcs
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- */
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-#include "timer.h"
-#include "conrpc.h"
-#include "console.h"
-
-void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *,
- struct lstcon_node *, struct lstcon_trans_stat *);
-
-static void
-lstcon_rpc_done(struct srpc_client_rpc *rpc)
-{
- struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv;
-
- LASSERT(crpc && rpc == crpc->crp_rpc);
- LASSERT(crpc->crp_posted && !crpc->crp_finished);
-
- spin_lock(&rpc->crpc_lock);
-
- if (!crpc->crp_trans) {
- /*
- * Orphan RPC is not in any transaction,
- * I'm just a poor body and nobody loves me
- */
- spin_unlock(&rpc->crpc_lock);
-
- /* release it */
- lstcon_rpc_put(crpc);
- return;
- }
-
- /* not an orphan RPC */
- crpc->crp_finished = 1;
-
- if (!crpc->crp_stamp) {
- /* not aborted */
- LASSERT(!crpc->crp_status);
-
- crpc->crp_stamp = cfs_time_current();
- crpc->crp_status = rpc->crpc_status;
- }
-
- /* wakeup (transaction)thread if I'm the last RPC in the transaction */
- if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining))
- wake_up(&crpc->crp_trans->tas_waitq);
-
- spin_unlock(&rpc->crpc_lock);
-}
-
-static int
-lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats,
- int bulk_npg, int bulk_len, int embedded,
- struct lstcon_rpc *crpc)
-{
- crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
- feats, bulk_npg, bulk_len,
- lstcon_rpc_done, (void *)crpc);
- if (!crpc->crp_rpc)
- return -ENOMEM;
-
- crpc->crp_trans = NULL;
- crpc->crp_node = nd;
- crpc->crp_posted = 0;
- crpc->crp_finished = 0;
- crpc->crp_unpacked = 0;
- crpc->crp_status = 0;
- crpc->crp_stamp = 0;
- crpc->crp_embedded = embedded;
- INIT_LIST_HEAD(&crpc->crp_link);
-
- atomic_inc(&console_session.ses_rpc_counter);
-
- return 0;
-}
-
-static int
-lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats,
- int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp)
-{
- struct lstcon_rpc *crpc = NULL;
- int rc;
-
- spin_lock(&console_session.ses_rpc_lock);
-
- crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist,
- struct lstcon_rpc, crp_link);
- if (crpc)
- list_del_init(&crpc->crp_link);
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- if (!crpc) {
- crpc = kzalloc(sizeof(*crpc), GFP_NOFS);
- if (!crpc)
- return -ENOMEM;
- }
-
- rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc);
- if (!rc) {
- *crpcpp = crpc;
- return 0;
- }
-
- kfree(crpc);
-
- return rc;
-}
-
-void
-lstcon_rpc_put(struct lstcon_rpc *crpc)
-{
- struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk;
- int i;
-
- LASSERT(list_empty(&crpc->crp_link));
-
- for (i = 0; i < bulk->bk_niov; i++) {
- if (!bulk->bk_iovs[i].bv_page)
- continue;
-
- __free_page(bulk->bk_iovs[i].bv_page);
- }
-
- srpc_client_rpc_decref(crpc->crp_rpc);
-
- if (crpc->crp_embedded) {
- /* embedded RPC, don't recycle it */
- memset(crpc, 0, sizeof(*crpc));
- crpc->crp_embedded = 1;
-
- } else {
- spin_lock(&console_session.ses_rpc_lock);
-
- list_add(&crpc->crp_link,
- &console_session.ses_rpc_freelist);
-
- spin_unlock(&console_session.ses_rpc_lock);
- }
-
- /* RPC is not alive now */
- atomic_dec(&console_session.ses_rpc_counter);
-}
-
-static void
-lstcon_rpc_post(struct lstcon_rpc *crpc)
-{
- struct lstcon_rpc_trans *trans = crpc->crp_trans;
-
- LASSERT(trans);
-
- atomic_inc(&trans->tas_remaining);
- crpc->crp_posted = 1;
-
- sfw_post_rpc(crpc->crp_rpc);
-}
-
-static char *
-lstcon_rpc_trans_name(int transop)
-{
- if (transop == LST_TRANS_SESNEW)
- return "SESNEW";
-
- if (transop == LST_TRANS_SESEND)
- return "SESEND";
-
- if (transop == LST_TRANS_SESQRY)
- return "SESQRY";
-
- if (transop == LST_TRANS_SESPING)
- return "SESPING";
-
- if (transop == LST_TRANS_TSBCLIADD)
- return "TSBCLIADD";
-
- if (transop == LST_TRANS_TSBSRVADD)
- return "TSBSRVADD";
-
- if (transop == LST_TRANS_TSBRUN)
- return "TSBRUN";
-
- if (transop == LST_TRANS_TSBSTOP)
- return "TSBSTOP";
-
- if (transop == LST_TRANS_TSBCLIQRY)
- return "TSBCLIQRY";
-
- if (transop == LST_TRANS_TSBSRVQRY)
- return "TSBSRVQRY";
-
- if (transop == LST_TRANS_STATQRY)
- return "STATQRY";
-
- return "Unknown";
-}
-
-int
-lstcon_rpc_trans_prep(struct list_head *translist, int transop,
- struct lstcon_rpc_trans **transpp)
-{
- struct lstcon_rpc_trans *trans;
-
- if (translist) {
- list_for_each_entry(trans, translist, tas_link) {
- /*
- * Can't enqueue two private transaction on
- * the same object
- */
- if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE)
- return -EPERM;
- }
- }
-
- /* create a trans group */
- trans = kzalloc(sizeof(*trans), GFP_NOFS);
- if (!trans)
- return -ENOMEM;
-
- trans->tas_opc = transop;
-
- if (!translist)
- INIT_LIST_HEAD(&trans->tas_olink);
- else
- list_add_tail(&trans->tas_olink, translist);
-
- list_add_tail(&trans->tas_link, &console_session.ses_trans_list);
-
- INIT_LIST_HEAD(&trans->tas_rpcs_list);
- atomic_set(&trans->tas_remaining, 0);
- init_waitqueue_head(&trans->tas_waitq);
-
- spin_lock(&console_session.ses_rpc_lock);
- trans->tas_features = console_session.ses_features;
- spin_unlock(&console_session.ses_rpc_lock);
-
- *transpp = trans;
- return 0;
-}
-
-void
-lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc)
-{
- list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
- crpc->crp_trans = trans;
-}
-
-void
-lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error)
-{
- struct srpc_client_rpc *rpc;
- struct lstcon_rpc *crpc;
- struct lstcon_node *nd;
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- rpc = crpc->crp_rpc;
-
- spin_lock(&rpc->crpc_lock);
-
- if (!crpc->crp_posted || /* not posted */
- crpc->crp_stamp) { /* rpc done or aborted already */
- if (!crpc->crp_stamp) {
- crpc->crp_stamp = cfs_time_current();
- crpc->crp_status = -EINTR;
- }
- spin_unlock(&rpc->crpc_lock);
- continue;
- }
-
- crpc->crp_stamp = cfs_time_current();
- crpc->crp_status = error;
-
- spin_unlock(&rpc->crpc_lock);
-
- sfw_abort_rpc(rpc);
-
- if (error != -ETIMEDOUT)
- continue;
-
- nd = crpc->crp_node;
- if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
- continue;
-
- nd->nd_stamp = crpc->crp_stamp;
- nd->nd_state = LST_NODE_DOWN;
- }
-}
-
-static int
-lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans)
-{
- if (console_session.ses_shutdown &&
- !list_empty(&trans->tas_olink)) /* Not an end session RPC */
- return 1;
-
- return !atomic_read(&trans->tas_remaining) ? 1 : 0;
-}
-
-int
-lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout)
-{
- struct lstcon_rpc *crpc;
- int rc;
-
- if (list_empty(&trans->tas_rpcs_list))
- return 0;
-
- if (timeout < LST_TRANS_MIN_TIMEOUT)
- timeout = LST_TRANS_MIN_TIMEOUT;
-
- CDEBUG(D_NET, "Transaction %s started\n",
- lstcon_rpc_trans_name(trans->tas_opc));
-
- /* post all requests */
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- LASSERT(!crpc->crp_posted);
-
- lstcon_rpc_post(crpc);
- }
-
- mutex_unlock(&console_session.ses_mutex);
-
- rc = wait_event_interruptible_timeout(trans->tas_waitq,
- lstcon_rpc_trans_check(trans),
- timeout * HZ);
- rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT);
-
- mutex_lock(&console_session.ses_mutex);
-
- if (console_session.ses_shutdown)
- rc = -ESHUTDOWN;
-
- if (rc || atomic_read(&trans->tas_remaining)) {
- /* treat short timeout as canceled */
- if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2)
- rc = -EINTR;
-
- lstcon_rpc_trans_abort(trans, rc);
- }
-
- CDEBUG(D_NET, "Transaction %s stopped: %d\n",
- lstcon_rpc_trans_name(trans->tas_opc), rc);
-
- lstcon_rpc_trans_stat(trans, lstcon_trans_stat());
-
- return rc;
-}
-
-static int
-lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp)
-{
- struct lstcon_node *nd = crpc->crp_node;
- struct srpc_client_rpc *rpc = crpc->crp_rpc;
- struct srpc_generic_reply *rep;
-
- LASSERT(nd && rpc);
- LASSERT(crpc->crp_stamp);
-
- if (crpc->crp_status) {
- *msgpp = NULL;
- return crpc->crp_status;
- }
-
- *msgpp = &rpc->crpc_replymsg;
- if (!crpc->crp_unpacked) {
- sfw_unpack_message(*msgpp);
- crpc->crp_unpacked = 1;
- }
-
- if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
- return 0;
-
- nd->nd_stamp = crpc->crp_stamp;
- rep = &(*msgpp)->msg_body.reply;
-
- if (rep->sid.ses_nid == LNET_NID_ANY)
- nd->nd_state = LST_NODE_UNKNOWN;
- else if (lstcon_session_match(rep->sid))
- nd->nd_state = LST_NODE_ACTIVE;
- else
- nd->nd_state = LST_NODE_BUSY;
-
- return 0;
-}
-
-void
-lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, struct lstcon_trans_stat *stat)
-{
- struct lstcon_rpc *crpc;
- struct srpc_msg *rep;
- int error;
-
- LASSERT(stat);
-
- memset(stat, 0, sizeof(*stat));
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- lstcon_rpc_stat_total(stat, 1);
-
- LASSERT(crpc->crp_stamp);
-
- error = lstcon_rpc_get_reply(crpc, &rep);
- if (error) {
- lstcon_rpc_stat_failure(stat, 1);
- if (!stat->trs_rpc_errno)
- stat->trs_rpc_errno = -error;
-
- continue;
- }
-
- lstcon_rpc_stat_success(stat, 1);
-
- lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat);
- }
-
- if (trans->tas_opc == LST_TRANS_SESNEW && !stat->trs_fwk_errno) {
- stat->trs_fwk_errno =
- lstcon_session_feats_check(trans->tas_features);
- }
-
- CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, RPC error(%d), Framework error(%d)\n",
- lstcon_rpc_trans_name(trans->tas_opc),
- lstcon_rpc_stat_success(stat, 0),
- lstcon_rpc_stat_failure(stat, 0),
- lstcon_rpc_stat_total(stat, 0),
- stat->trs_rpc_errno, stat->trs_fwk_errno);
-}
-
-int
-lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
- struct list_head __user *head_up,
- lstcon_rpc_readent_func_t readent)
-{
- struct list_head tmp;
- struct list_head __user *next;
- struct lstcon_rpc_ent *ent;
- struct srpc_generic_reply *rep;
- struct lstcon_rpc *crpc;
- struct srpc_msg *msg;
- struct lstcon_node *nd;
- long dur;
- struct timeval tv;
- int error;
-
- LASSERT(head_up);
-
- next = head_up;
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- if (copy_from_user(&tmp, next,
- sizeof(struct list_head)))
- return -EFAULT;
-
- next = tmp.next;
- if (next == head_up)
- return 0;
-
- ent = list_entry(next, struct lstcon_rpc_ent, rpe_link);
-
- LASSERT(crpc->crp_stamp);
-
- error = lstcon_rpc_get_reply(crpc, &msg);
-
- nd = crpc->crp_node;
-
- dur = (long)cfs_time_sub(crpc->crp_stamp,
- (unsigned long)console_session.ses_id.ses_stamp);
- jiffies_to_timeval(dur, &tv);
-
- if (copy_to_user(&ent->rpe_peer, &nd->nd_id,
- sizeof(struct lnet_process_id)) ||
- copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) ||
- copy_to_user(&ent->rpe_state, &nd->nd_state,
- sizeof(nd->nd_state)) ||
- copy_to_user(&ent->rpe_rpc_errno, &error,
- sizeof(error)))
- return -EFAULT;
-
- if (error)
- continue;
-
- /* RPC is done */
- rep = (struct srpc_generic_reply *)&msg->msg_body.reply;
-
- if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(rep->sid)) ||
- copy_to_user(&ent->rpe_fwk_errno, &rep->status,
- sizeof(rep->status)))
- return -EFAULT;
-
- if (!readent)
- continue;
-
- error = readent(trans->tas_opc, msg, ent);
- if (error)
- return error;
- }
-
- return 0;
-}
-
-void
-lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans)
-{
- struct srpc_client_rpc *rpc;
- struct lstcon_rpc *crpc;
- struct lstcon_rpc *tmp;
- int count = 0;
-
- list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) {
- rpc = crpc->crp_rpc;
-
- spin_lock(&rpc->crpc_lock);
-
- /* free it if not posted or finished already */
- if (!crpc->crp_posted || crpc->crp_finished) {
- spin_unlock(&rpc->crpc_lock);
-
- list_del_init(&crpc->crp_link);
- lstcon_rpc_put(crpc);
-
- continue;
- }
-
- /*
- * rpcs can be still not callbacked (even LNetMDUnlink is
- * called) because huge timeout for inaccessible network,
- * don't make user wait for them, just abandon them, they
- * will be recycled in callback
- */
- LASSERT(crpc->crp_status);
-
- crpc->crp_node = NULL;
- crpc->crp_trans = NULL;
- list_del_init(&crpc->crp_link);
- count++;
-
- spin_unlock(&rpc->crpc_lock);
-
- atomic_dec(&trans->tas_remaining);
- }
-
- LASSERT(!atomic_read(&trans->tas_remaining));
-
- list_del(&trans->tas_link);
- if (!list_empty(&trans->tas_olink))
- list_del(&trans->tas_olink);
-
- CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n",
- lstcon_rpc_trans_name(trans->tas_opc), count);
-
- kfree(trans);
-}
-
-int
-lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int feats, struct lstcon_rpc **crpc)
-{
- struct srpc_mksn_reqst *msrq;
- struct srpc_rmsn_reqst *rsrq;
- int rc;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION,
- feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst;
- msrq->mksn_sid = console_session.ses_id;
- msrq->mksn_force = console_session.ses_force;
- strlcpy(msrq->mksn_name, console_session.ses_name,
- sizeof(msrq->mksn_name));
- break;
-
- case LST_TRANS_SESEND:
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION,
- feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst;
- rsrq->rmsn_sid = console_session.ses_id;
- break;
-
- default:
- LBUG();
- }
-
- return 0;
-}
-
-int
-lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats,
- struct lstcon_rpc **crpc)
-{
- struct srpc_debug_reqst *drq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
- drq->dbg_sid = console_session.ses_id;
- drq->dbg_flags = 0;
-
- return rc;
-}
-
-int
-lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
- struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc)
-{
- struct lstcon_batch *batch;
- struct srpc_batch_reqst *brq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst;
-
- brq->bar_sid = console_session.ses_id;
- brq->bar_bid = tsb->tsb_id;
- brq->bar_testidx = tsb->tsb_index;
- brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN :
- (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP :
- SRPC_BATCH_OPC_QUERY);
-
- if (transop != LST_TRANS_TSBRUN &&
- transop != LST_TRANS_TSBSTOP)
- return 0;
-
- LASSERT(!tsb->tsb_index);
-
- batch = (struct lstcon_batch *)tsb;
- brq->bar_arg = batch->bat_arg;
-
- return 0;
-}
-
-int
-lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats,
- struct lstcon_rpc **crpc)
-{
- struct srpc_stat_reqst *srq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst;
-
- srq->str_sid = console_session.ses_id;
- srq->str_type = 0; /* XXX remove it */
-
- return 0;
-}
-
-static struct lnet_process_id_packed *
-lstcon_next_id(int idx, int nkiov, struct bio_vec *kiov)
-{
- struct lnet_process_id_packed *pid;
- int i;
-
- i = idx / SFW_ID_PER_PAGE;
-
- LASSERT(i < nkiov);
-
- pid = (struct lnet_process_id_packed *)page_address(kiov[i].bv_page);
-
- return &pid[idx % SFW_ID_PER_PAGE];
-}
-
-static int
-lstcon_dstnodes_prep(struct lstcon_group *grp, int idx,
- int dist, int span, int nkiov, struct bio_vec *kiov)
-{
- struct lnet_process_id_packed *pid;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int start;
- int end;
- int i = 0;
-
- LASSERT(dist >= 1);
- LASSERT(span >= 1);
- LASSERT(grp->grp_nnode >= 1);
-
- if (span > grp->grp_nnode)
- return -EINVAL;
-
- start = ((idx / dist) * span) % grp->grp_nnode;
- end = ((idx / dist) * span + span - 1) % grp->grp_nnode;
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
- nd = ndl->ndl_node;
- if (i < start) {
- i++;
- continue;
- }
-
- if (i > (end >= start ? end : grp->grp_nnode))
- break;
-
- pid = lstcon_next_id((i - start), nkiov, kiov);
- pid->nid = nd->nd_id.nid;
- pid->pid = nd->nd_id.pid;
- i++;
- }
-
- if (start <= end) /* done */
- return 0;
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
- if (i > grp->grp_nnode + end)
- break;
-
- nd = ndl->ndl_node;
- pid = lstcon_next_id((i - start), nkiov, kiov);
- pid->nid = nd->nd_id.nid;
- pid->pid = nd->nd_id.pid;
- i++;
- }
-
- return 0;
-}
-
-static int
-lstcon_pingrpc_prep(struct lst_test_ping_param *param, struct srpc_test_reqst *req)
-{
- struct test_ping_req *prq = &req->tsr_u.ping;
-
- prq->png_size = param->png_size;
- prq->png_flags = param->png_flags;
- /* TODO dest */
- return 0;
-}
-
-static int
-lstcon_bulkrpc_v0_prep(struct lst_test_bulk_param *param,
- struct srpc_test_reqst *req)
-{
- struct test_bulk_req *brq = &req->tsr_u.bulk_v0;
-
- brq->blk_opc = param->blk_opc;
- brq->blk_npg = DIV_ROUND_UP(param->blk_size, PAGE_SIZE);
- brq->blk_flags = param->blk_flags;
-
- return 0;
-}
-
-static int
-lstcon_bulkrpc_v1_prep(struct lst_test_bulk_param *param, bool is_client,
- struct srpc_test_reqst *req)
-{
- struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1;
-
- brq->blk_opc = param->blk_opc;
- brq->blk_flags = param->blk_flags;
- brq->blk_len = param->blk_size;
- brq->blk_offset = is_client ? param->blk_cli_off : param->blk_srv_off;
-
- return 0;
-}
-
-int
-lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
- struct lstcon_test *test, struct lstcon_rpc **crpc)
-{
- struct lstcon_group *sgrp = test->tes_src_grp;
- struct lstcon_group *dgrp = test->tes_dst_grp;
- struct srpc_test_reqst *trq;
- struct srpc_bulk *bulk;
- int i;
- int npg = 0;
- int nob = 0;
- int rc = 0;
-
- if (transop == LST_TRANS_TSBCLIADD) {
- npg = sfw_id_pages(test->tes_span);
- nob = !(feats & LST_FEAT_BULK_LEN) ?
- npg * PAGE_SIZE :
- sizeof(struct lnet_process_id_packed) * test->tes_span;
- }
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc);
- if (rc)
- return rc;
-
- trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst;
-
- if (transop == LST_TRANS_TSBSRVADD) {
- int ndist = DIV_ROUND_UP(sgrp->grp_nnode, test->tes_dist);
- int nspan = DIV_ROUND_UP(dgrp->grp_nnode, test->tes_span);
- int nmax = DIV_ROUND_UP(ndist, nspan);
-
- trq->tsr_ndest = 0;
- trq->tsr_loop = nmax * test->tes_dist * test->tes_concur;
- } else {
- bulk = &(*crpc)->crp_rpc->crpc_bulk;
-
- for (i = 0; i < npg; i++) {
- int len;
-
- LASSERT(nob > 0);
-
- len = !(feats & LST_FEAT_BULK_LEN) ?
- PAGE_SIZE :
- min_t(int, nob, PAGE_SIZE);
- nob -= len;
-
- bulk->bk_iovs[i].bv_offset = 0;
- bulk->bk_iovs[i].bv_len = len;
- bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL);
-
- if (!bulk->bk_iovs[i].bv_page) {
- lstcon_rpc_put(*crpc);
- return -ENOMEM;
- }
- }
-
- bulk->bk_sink = 0;
-
- LASSERT(transop == LST_TRANS_TSBCLIADD);
-
- rc = lstcon_dstnodes_prep(test->tes_dst_grp,
- test->tes_cliidx++,
- test->tes_dist,
- test->tes_span,
- npg, &bulk->bk_iovs[0]);
- if (rc) {
- lstcon_rpc_put(*crpc);
- return rc;
- }
-
- trq->tsr_ndest = test->tes_span;
- trq->tsr_loop = test->tes_loop;
- }
-
- trq->tsr_sid = console_session.ses_id;
- trq->tsr_bid = test->tes_hdr.tsb_id;
- trq->tsr_concur = test->tes_concur;
- trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0;
- trq->tsr_stop_onerr = !!test->tes_stop_onerr;
-
- switch (test->tes_type) {
- case LST_TEST_PING:
- trq->tsr_service = SRPC_SERVICE_PING;
- rc = lstcon_pingrpc_prep((struct lst_test_ping_param *)
- &test->tes_param[0], trq);
- break;
-
- case LST_TEST_BULK:
- trq->tsr_service = SRPC_SERVICE_BRW;
- if (!(feats & LST_FEAT_BULK_LEN)) {
- rc = lstcon_bulkrpc_v0_prep((struct lst_test_bulk_param *)
- &test->tes_param[0], trq);
- } else {
- rc = lstcon_bulkrpc_v1_prep((struct lst_test_bulk_param *)
- &test->tes_param[0],
- trq->tsr_is_client, trq);
- }
-
- break;
- default:
- LBUG();
- break;
- }
-
- return rc;
-}
-
-static int
-lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans,
- struct lstcon_node *nd, struct srpc_msg *reply)
-{
- struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply;
- int status = mksn_rep->mksn_status;
-
- if (!status &&
- (reply->msg_ses_feats & ~LST_FEATS_MASK)) {
- mksn_rep->mksn_status = EPROTO;
- status = EPROTO;
- }
-
- if (status == EPROTO) {
- CNETERR("session protocol error from %s: %u\n",
- libcfs_nid2str(nd->nd_id.nid),
- reply->msg_ses_feats);
- }
-
- if (status)
- return status;
-
- if (!trans->tas_feats_updated) {
- spin_lock(&console_session.ses_rpc_lock);
- if (!trans->tas_feats_updated) { /* recheck with lock */
- trans->tas_feats_updated = 1;
- trans->tas_features = reply->msg_ses_feats;
- }
- spin_unlock(&console_session.ses_rpc_lock);
- }
-
- if (reply->msg_ses_feats != trans->tas_features) {
- CNETERR("Framework features %x from %s is different with features on this transaction: %x\n",
- reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid),
- trans->tas_features);
- mksn_rep->mksn_status = EPROTO;
- status = EPROTO;
- }
-
- if (!status) {
- /* session timeout on remote node */
- nd->nd_timeout = mksn_rep->mksn_timeout;
- }
-
- return status;
-}
-
-void
-lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg,
- struct lstcon_node *nd, struct lstcon_trans_stat *stat)
-{
- struct srpc_rmsn_reply *rmsn_rep;
- struct srpc_debug_reply *dbg_rep;
- struct srpc_batch_reply *bat_rep;
- struct srpc_test_reply *test_rep;
- struct srpc_stat_reply *stat_rep;
- int rc = 0;
-
- switch (trans->tas_opc) {
- case LST_TRANS_SESNEW:
- rc = lstcon_sesnew_stat_reply(trans, nd, msg);
- if (!rc) {
- lstcon_sesop_stat_success(stat, 1);
- return;
- }
-
- lstcon_sesop_stat_failure(stat, 1);
- break;
-
- case LST_TRANS_SESEND:
- rmsn_rep = &msg->msg_body.rmsn_reply;
- /* ESRCH is not an error for end session */
- if (!rmsn_rep->rmsn_status ||
- rmsn_rep->rmsn_status == ESRCH) {
- lstcon_sesop_stat_success(stat, 1);
- return;
- }
-
- lstcon_sesop_stat_failure(stat, 1);
- rc = rmsn_rep->rmsn_status;
- break;
-
- case LST_TRANS_SESQRY:
- case LST_TRANS_SESPING:
- dbg_rep = &msg->msg_body.dbg_reply;
-
- if (dbg_rep->dbg_status == ESRCH) {
- lstcon_sesqry_stat_unknown(stat, 1);
- return;
- }
-
- if (lstcon_session_match(dbg_rep->dbg_sid))
- lstcon_sesqry_stat_active(stat, 1);
- else
- lstcon_sesqry_stat_busy(stat, 1);
- return;
-
- case LST_TRANS_TSBRUN:
- case LST_TRANS_TSBSTOP:
- bat_rep = &msg->msg_body.bat_reply;
-
- if (!bat_rep->bar_status) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- if (bat_rep->bar_status == EPERM &&
- trans->tas_opc == LST_TRANS_TSBSTOP) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- lstcon_tsbop_stat_failure(stat, 1);
- rc = bat_rep->bar_status;
- break;
-
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- bat_rep = &msg->msg_body.bat_reply;
-
- if (bat_rep->bar_active)
- lstcon_tsbqry_stat_run(stat, 1);
- else
- lstcon_tsbqry_stat_idle(stat, 1);
-
- if (!bat_rep->bar_status)
- return;
-
- lstcon_tsbqry_stat_failure(stat, 1);
- rc = bat_rep->bar_status;
- break;
-
- case LST_TRANS_TSBCLIADD:
- case LST_TRANS_TSBSRVADD:
- test_rep = &msg->msg_body.tes_reply;
-
- if (!test_rep->tsr_status) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- lstcon_tsbop_stat_failure(stat, 1);
- rc = test_rep->tsr_status;
- break;
-
- case LST_TRANS_STATQRY:
- stat_rep = &msg->msg_body.stat_reply;
-
- if (!stat_rep->str_status) {
- lstcon_statqry_stat_success(stat, 1);
- return;
- }
-
- lstcon_statqry_stat_failure(stat, 1);
- rc = stat_rep->str_status;
- break;
-
- default:
- LBUG();
- }
-
- if (!stat->trs_fwk_errno)
- stat->trs_fwk_errno = rc;
-}
-
-int
-lstcon_rpc_trans_ndlist(struct list_head *ndlist,
- struct list_head *translist, int transop,
- void *arg, lstcon_rpc_cond_func_t condition,
- struct lstcon_rpc_trans **transpp)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- struct lstcon_rpc *rpc;
- unsigned int feats;
- int rc;
-
- /* Creating session RPG for list of nodes */
-
- rc = lstcon_rpc_trans_prep(translist, transop, &trans);
- if (rc) {
- CERROR("Can't create transaction %d: %d\n", transop, rc);
- return rc;
- }
-
- feats = trans->tas_features;
- list_for_each_entry(ndl, ndlist, ndl_link) {
- rc = !condition ? 1 :
- condition(transop, ndl->ndl_node, arg);
-
- if (!rc)
- continue;
-
- if (rc < 0) {
- CDEBUG(D_NET, "Condition error while creating RPC for transaction %d: %d\n",
- transop, rc);
- break;
- }
-
- nd = ndl->ndl_node;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- case LST_TRANS_SESEND:
- rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc);
- break;
- case LST_TRANS_SESQRY:
- case LST_TRANS_SESPING:
- rc = lstcon_dbgrpc_prep(nd, feats, &rpc);
- break;
- case LST_TRANS_TSBCLIADD:
- case LST_TRANS_TSBSRVADD:
- rc = lstcon_testrpc_prep(nd, transop, feats,
- (struct lstcon_test *)arg,
- &rpc);
- break;
- case LST_TRANS_TSBRUN:
- case LST_TRANS_TSBSTOP:
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- rc = lstcon_batrpc_prep(nd, transop, feats,
- (struct lstcon_tsb_hdr *)arg,
- &rpc);
- break;
- case LST_TRANS_STATQRY:
- rc = lstcon_statrpc_prep(nd, feats, &rpc);
- break;
- default:
- rc = -EINVAL;
- break;
- }
-
- if (rc) {
- CERROR("Failed to create RPC for transaction %s: %d\n",
- lstcon_rpc_trans_name(transop), rc);
- break;
- }
-
- lstcon_rpc_trans_addreq(trans, rpc);
- }
-
- if (!rc) {
- *transpp = trans;
- return 0;
- }
-
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-static void
-lstcon_rpc_pinger(void *arg)
-{
- struct stt_timer *ptimer = (struct stt_timer *)arg;
- struct lstcon_rpc_trans *trans;
- struct lstcon_rpc *crpc;
- struct srpc_msg *rep;
- struct srpc_debug_reqst *drq;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int intv;
- int count = 0;
- int rc;
-
- /*
- * RPC pinger is a special case of transaction,
- * it's called by timer at 8 seconds interval.
- */
- mutex_lock(&console_session.ses_mutex);
-
- if (console_session.ses_shutdown || console_session.ses_expired) {
- mutex_unlock(&console_session.ses_mutex);
- return;
- }
-
- if (!console_session.ses_expired &&
- ktime_get_real_seconds() - console_session.ses_laststamp >
- (time64_t)console_session.ses_timeout)
- console_session.ses_expired = 1;
-
- trans = console_session.ses_ping;
-
- LASSERT(trans);
-
- list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) {
- nd = ndl->ndl_node;
-
- if (console_session.ses_expired) {
- /* idle console, end session on all nodes */
- if (nd->nd_state != LST_NODE_ACTIVE)
- continue;
-
- rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND,
- trans->tas_features, &crpc);
- if (rc) {
- CERROR("Out of memory\n");
- break;
- }
-
- lstcon_rpc_trans_addreq(trans, crpc);
- lstcon_rpc_post(crpc);
-
- continue;
- }
-
- crpc = &nd->nd_ping;
-
- if (crpc->crp_rpc) {
- LASSERT(crpc->crp_trans == trans);
- LASSERT(!list_empty(&crpc->crp_link));
-
- spin_lock(&crpc->crp_rpc->crpc_lock);
-
- LASSERT(crpc->crp_posted);
-
- if (!crpc->crp_finished) {
- /* in flight */
- spin_unlock(&crpc->crp_rpc->crpc_lock);
- continue;
- }
-
- spin_unlock(&crpc->crp_rpc->crpc_lock);
-
- lstcon_rpc_get_reply(crpc, &rep);
-
- list_del_init(&crpc->crp_link);
-
- lstcon_rpc_put(crpc);
- }
-
- if (nd->nd_state != LST_NODE_ACTIVE)
- continue;
-
- intv = (jiffies - nd->nd_stamp) / msecs_to_jiffies(MSEC_PER_SEC);
- if (intv < nd->nd_timeout / 2)
- continue;
-
- rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG,
- trans->tas_features, 0, 0, 1, crpc);
- if (rc) {
- CERROR("Out of memory\n");
- break;
- }
-
- drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
- drq->dbg_sid = console_session.ses_id;
- drq->dbg_flags = 0;
-
- lstcon_rpc_trans_addreq(trans, crpc);
- lstcon_rpc_post(crpc);
-
- count++;
- }
-
- if (console_session.ses_expired) {
- mutex_unlock(&console_session.ses_mutex);
- return;
- }
-
- CDEBUG(D_NET, "Ping %d nodes in session\n", count);
-
- ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
- stt_add_timer(ptimer);
-
- mutex_unlock(&console_session.ses_mutex);
-}
-
-int
-lstcon_rpc_pinger_start(void)
-{
- struct stt_timer *ptimer;
- int rc;
-
- LASSERT(list_empty(&console_session.ses_rpc_freelist));
- LASSERT(!atomic_read(&console_session.ses_rpc_counter));
-
- rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING,
- &console_session.ses_ping);
- if (rc) {
- CERROR("Failed to create console pinger\n");
- return rc;
- }
-
- ptimer = &console_session.ses_ping_timer;
- ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
-
- stt_add_timer(ptimer);
-
- return 0;
-}
-
-void
-lstcon_rpc_pinger_stop(void)
-{
- LASSERT(console_session.ses_shutdown);
-
- stt_del_timer(&console_session.ses_ping_timer);
-
- lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN);
- lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat());
- lstcon_rpc_trans_destroy(console_session.ses_ping);
-
- memset(lstcon_trans_stat(), 0, sizeof(struct lstcon_trans_stat));
-
- console_session.ses_ping = NULL;
-}
-
-void
-lstcon_rpc_cleanup_wait(void)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_rpc *crpc;
- struct lstcon_rpc *temp;
- struct list_head *pacer;
- struct list_head zlist;
-
- /* Called with hold of global mutex */
-
- LASSERT(console_session.ses_shutdown);
-
- while (!list_empty(&console_session.ses_trans_list)) {
- list_for_each(pacer, &console_session.ses_trans_list) {
- trans = list_entry(pacer, struct lstcon_rpc_trans,
- tas_link);
-
- CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
- lstcon_rpc_trans_name(trans->tas_opc));
-
- wake_up(&trans->tas_waitq);
- }
-
- mutex_unlock(&console_session.ses_mutex);
-
- CWARN("Session is shutting down, waiting for termination of transactions\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
-
- mutex_lock(&console_session.ses_mutex);
- }
-
- spin_lock(&console_session.ses_rpc_lock);
-
- lst_wait_until(!atomic_read(&console_session.ses_rpc_counter),
- console_session.ses_rpc_lock,
- "Network is not accessible or target is down, waiting for %d console RPCs to being recycled\n",
- atomic_read(&console_session.ses_rpc_counter));
-
- list_add(&zlist, &console_session.ses_rpc_freelist);
- list_del_init(&console_session.ses_rpc_freelist);
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- list_for_each_entry_safe(crpc, temp, &zlist, crp_link) {
- list_del(&crpc->crp_link);
- kfree(crpc);
- }
-}
-
-int
-lstcon_rpc_module_init(void)
-{
- INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list);
- console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger;
- console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer;
-
- console_session.ses_ping = NULL;
-
- spin_lock_init(&console_session.ses_rpc_lock);
- atomic_set(&console_session.ses_rpc_counter, 0);
- INIT_LIST_HEAD(&console_session.ses_rpc_freelist);
-
- return 0;
-}
-
-void
-lstcon_rpc_module_fini(void)
-{
- LASSERT(list_empty(&console_session.ses_rpc_freelist));
- LASSERT(!atomic_read(&console_session.ses_rpc_counter));
-}
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h
deleted file mode 100644
index 374a5f31ef6f..000000000000
--- a/drivers/staging/lustre/lnet/selftest/conrpc.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * /lnet/selftest/conrpc.h
- *
- * Console rpc
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- */
-
-#ifndef __LST_CONRPC_H__
-#define __LST_CONRPC_H__
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "rpc.h"
-#include "selftest.h"
-
-/* Console rpc and rpc transaction */
-#define LST_TRANS_TIMEOUT 30
-#define LST_TRANS_MIN_TIMEOUT 3
-
-#define LST_VALIDATE_TIMEOUT(t) min(max(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT)
-
-#define LST_PING_INTERVAL 8
-
-struct lstcon_rpc_trans;
-struct lstcon_tsb_hdr;
-struct lstcon_test;
-struct lstcon_node;
-
-struct lstcon_rpc {
- struct list_head crp_link; /* chain on rpc transaction */
- struct srpc_client_rpc *crp_rpc; /* client rpc */
- struct lstcon_node *crp_node; /* destination node */
- struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */
-
- unsigned int crp_posted:1; /* rpc is posted */
- unsigned int crp_finished:1; /* rpc is finished */
- unsigned int crp_unpacked:1; /* reply is unpacked */
- /** RPC is embedded in other structure and can't free it */
- unsigned int crp_embedded:1;
- int crp_status; /* console rpc errors */
- unsigned long crp_stamp; /* replied time stamp */
-};
-
-struct lstcon_rpc_trans {
- struct list_head tas_olink; /* link chain on owner list */
- struct list_head tas_link; /* link chain on global list */
- int tas_opc; /* operation code of transaction */
- unsigned int tas_feats_updated; /* features mask is uptodate */
- unsigned int tas_features; /* test features mask */
- wait_queue_head_t tas_waitq; /* wait queue head */
- atomic_t tas_remaining; /* # of un-scheduled rpcs */
- struct list_head tas_rpcs_list; /* queued requests */
-};
-
-#define LST_TRANS_PRIVATE 0x1000
-
-#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01)
-#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02)
-#define LST_TRANS_SESQRY 0x03
-#define LST_TRANS_SESPING 0x04
-
-#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11)
-#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12)
-#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13)
-#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14)
-#define LST_TRANS_TSBCLIQRY 0x15
-#define LST_TRANS_TSBSRVQRY 0x16
-
-#define LST_TRANS_STATQRY 0x21
-
-typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
-typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *,
- struct lstcon_rpc_ent __user *);
-
-int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int version, struct lstcon_rpc **crpc);
-int lstcon_dbgrpc_prep(struct lstcon_node *nd,
- unsigned int version, struct lstcon_rpc **crpc);
-int lstcon_batrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int version, struct lstcon_tsb_hdr *tsb,
- struct lstcon_rpc **crpc);
-int lstcon_testrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int version, struct lstcon_test *test,
- struct lstcon_rpc **crpc);
-int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version,
- struct lstcon_rpc **crpc);
-void lstcon_rpc_put(struct lstcon_rpc *crpc);
-int lstcon_rpc_trans_prep(struct list_head *translist,
- int transop, struct lstcon_rpc_trans **transpp);
-int lstcon_rpc_trans_ndlist(struct list_head *ndlist,
- struct list_head *translist, int transop,
- void *arg, lstcon_rpc_cond_func_t condition,
- struct lstcon_rpc_trans **transpp);
-void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans,
- struct lstcon_trans_stat *stat);
-int lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
- struct list_head __user *head_up,
- lstcon_rpc_readent_func_t readent);
-void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error);
-void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans);
-void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans,
- struct lstcon_rpc *req);
-int lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout);
-int lstcon_rpc_pinger_start(void);
-void lstcon_rpc_pinger_stop(void);
-void lstcon_rpc_cleanup_wait(void);
-int lstcon_rpc_module_init(void);
-void lstcon_rpc_module_fini(void);
-
-#endif
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
deleted file mode 100644
index 1acd5cb324b1..000000000000
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ /dev/null
@@ -1,2101 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Infrastructure of LST console
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-#include "console.h"
-#include "conrpc.h"
-
-#define LST_NODE_STATE_COUNTER(nd, p) \
-do { \
- if ((nd)->nd_state == LST_NODE_ACTIVE) \
- (p)->nle_nactive++; \
- else if ((nd)->nd_state == LST_NODE_BUSY) \
- (p)->nle_nbusy++; \
- else if ((nd)->nd_state == LST_NODE_DOWN) \
- (p)->nle_ndown++; \
- else \
- (p)->nle_nunknown++; \
- (p)->nle_nnode++; \
-} while (0)
-
-struct lstcon_session console_session;
-
-static void
-lstcon_node_get(struct lstcon_node *nd)
-{
- LASSERT(nd->nd_ref >= 1);
-
- nd->nd_ref++;
-}
-
-static int
-lstcon_node_find(struct lnet_process_id id, struct lstcon_node **ndpp,
- int create)
-{
- struct lstcon_ndlink *ndl;
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
-
- LASSERT(id.nid != LNET_NID_ANY);
-
- list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx],
- ndl_hlink) {
- if (ndl->ndl_node->nd_id.nid != id.nid ||
- ndl->ndl_node->nd_id.pid != id.pid)
- continue;
-
- lstcon_node_get(ndl->ndl_node);
- *ndpp = ndl->ndl_node;
- return 0;
- }
-
- if (!create)
- return -ENOENT;
-
- *ndpp = kzalloc(sizeof(**ndpp) + sizeof(*ndl), GFP_KERNEL);
- if (!*ndpp)
- return -ENOMEM;
-
- ndl = (struct lstcon_ndlink *)(*ndpp + 1);
-
- ndl->ndl_node = *ndpp;
-
- ndl->ndl_node->nd_ref = 1;
- ndl->ndl_node->nd_id = id;
- ndl->ndl_node->nd_stamp = cfs_time_current();
- ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
- ndl->ndl_node->nd_timeout = 0;
- memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc));
-
- /*
- * queued in global hash & list, no refcount is taken by
- * global hash & list, if caller release his refcount,
- * node will be released
- */
- list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]);
- list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list);
-
- return 0;
-}
-
-static void
-lstcon_node_put(struct lstcon_node *nd)
-{
- struct lstcon_ndlink *ndl;
-
- LASSERT(nd->nd_ref > 0);
-
- if (--nd->nd_ref > 0)
- return;
-
- ndl = (struct lstcon_ndlink *)(nd + 1);
-
- LASSERT(!list_empty(&ndl->ndl_link));
- LASSERT(!list_empty(&ndl->ndl_hlink));
-
- /* remove from session */
- list_del(&ndl->ndl_link);
- list_del(&ndl->ndl_hlink);
-
- kfree(nd);
-}
-
-static int
-lstcon_ndlink_find(struct list_head *hash, struct lnet_process_id id,
- struct lstcon_ndlink **ndlpp, int create)
-{
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int rc;
-
- if (id.nid == LNET_NID_ANY)
- return -EINVAL;
-
- /* search in hash */
- list_for_each_entry(ndl, &hash[idx], ndl_hlink) {
- if (ndl->ndl_node->nd_id.nid != id.nid ||
- ndl->ndl_node->nd_id.pid != id.pid)
- continue;
-
- *ndlpp = ndl;
- return 0;
- }
-
- if (!create)
- return -ENOENT;
-
- /* find or create in session hash */
- rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0);
- if (rc)
- return rc;
-
- ndl = kzalloc(sizeof(struct lstcon_ndlink), GFP_NOFS);
- if (!ndl) {
- lstcon_node_put(nd);
- return -ENOMEM;
- }
-
- *ndlpp = ndl;
-
- ndl->ndl_node = nd;
- INIT_LIST_HEAD(&ndl->ndl_link);
- list_add_tail(&ndl->ndl_hlink, &hash[idx]);
-
- return 0;
-}
-
-static void
-lstcon_ndlink_release(struct lstcon_ndlink *ndl)
-{
- LASSERT(list_empty(&ndl->ndl_link));
- LASSERT(!list_empty(&ndl->ndl_hlink));
-
- list_del(&ndl->ndl_hlink); /* delete from hash */
- lstcon_node_put(ndl->ndl_node);
-
- kfree(ndl);
-}
-
-static int
-lstcon_group_alloc(char *name, struct lstcon_group **grpp)
-{
- struct lstcon_group *grp;
- int i;
-
- grp = kmalloc(offsetof(struct lstcon_group,
- grp_ndl_hash[LST_NODE_HASHSIZE]),
- GFP_KERNEL);
- if (!grp)
- return -ENOMEM;
-
- grp->grp_ref = 1;
- if (name) {
- if (strlen(name) > sizeof(grp->grp_name) - 1) {
- kfree(grp);
- return -E2BIG;
- }
- strncpy(grp->grp_name, name, sizeof(grp->grp_name));
- }
-
- INIT_LIST_HEAD(&grp->grp_link);
- INIT_LIST_HEAD(&grp->grp_ndl_list);
- INIT_LIST_HEAD(&grp->grp_trans_list);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++)
- INIT_LIST_HEAD(&grp->grp_ndl_hash[i]);
-
- *grpp = grp;
-
- return 0;
-}
-
-static void
-lstcon_group_addref(struct lstcon_group *grp)
-{
- grp->grp_ref++;
-}
-
-static void lstcon_group_ndlink_release(struct lstcon_group *,
- struct lstcon_ndlink *);
-
-static void
-lstcon_group_drain(struct lstcon_group *grp, int keep)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_ndlink *tmp;
-
- list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
- if (!(ndl->ndl_node->nd_state & keep))
- lstcon_group_ndlink_release(grp, ndl);
- }
-}
-
-static void
-lstcon_group_decref(struct lstcon_group *grp)
-{
- int i;
-
- if (--grp->grp_ref > 0)
- return;
-
- if (!list_empty(&grp->grp_link))
- list_del(&grp->grp_link);
-
- lstcon_group_drain(grp, 0);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++)
- LASSERT(list_empty(&grp->grp_ndl_hash[i]));
-
- kfree(grp);
-}
-
-static int
-lstcon_group_find(const char *name, struct lstcon_group **grpp)
-{
- struct lstcon_group *grp;
-
- list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
- if (strncmp(grp->grp_name, name, LST_NAME_SIZE))
- continue;
-
- lstcon_group_addref(grp); /* +1 ref for caller */
- *grpp = grp;
- return 0;
- }
-
- return -ENOENT;
-}
-
-static int
-lstcon_group_ndlink_find(struct lstcon_group *grp, struct lnet_process_id id,
- struct lstcon_ndlink **ndlpp, int create)
-{
- int rc;
-
- rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create);
- if (rc)
- return rc;
-
- if (!list_empty(&(*ndlpp)->ndl_link))
- return 0;
-
- list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list);
- grp->grp_nnode++;
-
- return 0;
-}
-
-static void
-lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl)
-{
- list_del_init(&ndl->ndl_link);
- lstcon_ndlink_release(ndl);
- grp->grp_nnode--;
-}
-
-static void
-lstcon_group_ndlink_move(struct lstcon_group *old,
- struct lstcon_group *new, struct lstcon_ndlink *ndl)
-{
- unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
- LST_NODE_HASHSIZE;
-
- list_del(&ndl->ndl_hlink);
- list_del(&ndl->ndl_link);
- old->grp_nnode--;
-
- list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]);
- list_add_tail(&ndl->ndl_link, &new->grp_ndl_list);
- new->grp_nnode++;
-}
-
-static void
-lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new)
-{
- struct lstcon_ndlink *ndl;
-
- while (!list_empty(&old->grp_ndl_list)) {
- ndl = list_entry(old->grp_ndl_list.next,
- struct lstcon_ndlink, ndl_link);
- lstcon_group_ndlink_move(old, new, ndl);
- }
-}
-
-static int
-lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
- struct lstcon_group *grp = (struct lstcon_group *)arg;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- if (nd->nd_state == LST_NODE_ACTIVE)
- return 0;
- break;
-
- case LST_TRANS_SESEND:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return 0;
-
- if (grp && nd->nd_ref > 1)
- return 0;
- break;
-
- case LST_TRANS_SESQRY:
- break;
-
- default:
- LBUG();
- }
-
- return 1;
-}
-
-static int
-lstcon_sesrpc_readent(int transop, struct srpc_msg *msg,
- struct lstcon_rpc_ent __user *ent_up)
-{
- struct srpc_debug_reply *rep;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- case LST_TRANS_SESEND:
- return 0;
-
- case LST_TRANS_SESQRY:
- rep = &msg->msg_body.dbg_reply;
-
- if (copy_to_user(&ent_up->rpe_priv[0],
- &rep->dbg_timeout, sizeof(int)) ||
- copy_to_user(&ent_up->rpe_payload[0],
- &rep->dbg_name, LST_NAME_SIZE))
- return -EFAULT;
-
- return 0;
-
- default:
- LBUG();
- }
-
- return 0;
-}
-
-static int
-lstcon_group_nodes_add(struct lstcon_group *grp,
- int count, struct lnet_process_id __user *ids_up,
- unsigned int *featp,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_ndlink *ndl;
- struct lstcon_group *tmp;
- struct lnet_process_id id;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0 ; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* skip if it's in this group already */
- rc = lstcon_group_ndlink_find(grp, id, &ndl, 0);
- if (!rc)
- continue;
-
- /* add to tmp group */
- rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1);
- if (rc) {
- CERROR("Can't create ndlink, out of memory\n");
- break;
- }
- }
-
- if (rc) {
- lstcon_group_decref(tmp);
- return rc;
- }
-
- rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
- &tmp->grp_trans_list, LST_TRANS_SESNEW,
- tmp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- lstcon_group_decref(tmp);
- return rc;
- }
-
- /* post all RPCs */
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_sesrpc_readent);
- *featp = trans->tas_features;
-
- /* destroy all RPGs */
- lstcon_rpc_trans_destroy(trans);
-
- lstcon_group_move(tmp, grp);
- lstcon_group_decref(tmp);
-
- return rc;
-}
-
-static int
-lstcon_group_nodes_remove(struct lstcon_group *grp,
- int count, struct lnet_process_id __user *ids_up,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_ndlink *ndl;
- struct lstcon_group *tmp;
- struct lnet_process_id id;
- int rc;
- int i;
-
- /* End session and remove node from the group */
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- goto error;
- }
-
- /* move node to tmp group */
- if (!lstcon_group_ndlink_find(grp, id, &ndl, 0))
- lstcon_group_ndlink_move(grp, tmp, ndl);
- }
-
- rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
- &tmp->grp_trans_list, LST_TRANS_SESEND,
- tmp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- goto error;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* release nodes anyway, because we can't rollback status */
- lstcon_group_decref(tmp);
-
- return rc;
-error:
- lstcon_group_move(tmp, grp);
- lstcon_group_decref(tmp);
-
- return rc;
-}
-
-int
-lstcon_group_add(char *name)
-{
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST;
- if (rc) {
- /* find a group with same name */
- lstcon_group_decref(grp);
- return rc;
- }
-
- rc = lstcon_group_alloc(name, &grp);
- if (rc) {
- CERROR("Can't allocate descriptor for group %s\n", name);
- return -ENOMEM;
- }
-
- list_add_tail(&grp->grp_link, &console_session.ses_grp_list);
-
- return rc;
-}
-
-int
-lstcon_nodes_add(char *name, int count, struct lnet_process_id __user *ids_up,
- unsigned int *featp, struct list_head __user *result_up)
-{
- struct lstcon_group *grp;
- int rc;
-
- LASSERT(count > 0);
- LASSERT(ids_up);
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by other threads or test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
-
- return -EBUSY;
- }
-
- rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_del(char *name)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by others threads or test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &grp->grp_trans_list, LST_TRANS_SESEND,
- grp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- lstcon_group_decref(grp);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- lstcon_rpc_trans_destroy(trans);
-
- lstcon_group_decref(grp);
- /*
- * -ref for session, it's destroyed,
- * status can't be rolled back, destroy group anyway
- */
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_clean(char *name, int args)
-{
- struct lstcon_group *grp = NULL;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- args = (LST_NODE_ACTIVE | LST_NODE_BUSY |
- LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args;
-
- lstcon_group_drain(grp, args);
-
- lstcon_group_decref(grp);
- /* release empty group */
- if (list_empty(&grp->grp_ndl_list))
- lstcon_group_decref(grp);
-
- return 0;
-}
-
-int
-lstcon_nodes_remove(char *name, int count,
- struct lnet_process_id __user *ids_up,
- struct list_head __user *result_up)
-{
- struct lstcon_group *grp = NULL;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up);
-
- lstcon_group_decref(grp);
- /* release empty group */
- if (list_empty(&grp->grp_ndl_list))
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_refresh(char *name, struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- /* re-invite all inactive nodes int the group */
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &grp->grp_trans_list, LST_TRANS_SESNEW,
- grp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- /* local error, return */
- CDEBUG(D_NET, "Can't create transaction: %d\n", rc);
- lstcon_group_decref(grp);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* -ref for me */
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_list(int index, int len, char __user *name_up)
-{
- struct lstcon_group *grp;
-
- LASSERT(index >= 0);
- LASSERT(name_up);
-
- list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
- if (!index--) {
- return copy_to_user(name_up, grp->grp_name, len) ?
- -EFAULT : 0;
- }
- }
-
- return -ENOENT;
-}
-
-static int
-lstcon_nodes_getent(struct list_head *head, int *index_p,
- int *count_p, struct lstcon_node_ent __user *dents_up)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int count = 0;
- int index = 0;
-
- LASSERT(index_p && count_p);
- LASSERT(dents_up);
- LASSERT(*index_p >= 0);
- LASSERT(*count_p > 0);
-
- list_for_each_entry(ndl, head, ndl_link) {
- if (index++ < *index_p)
- continue;
-
- if (count >= *count_p)
- break;
-
- nd = ndl->ndl_node;
- if (copy_to_user(&dents_up[count].nde_id,
- &nd->nd_id, sizeof(nd->nd_id)) ||
- copy_to_user(&dents_up[count].nde_state,
- &nd->nd_state, sizeof(nd->nd_state)))
- return -EFAULT;
-
- count++;
- }
-
- if (index <= *index_p)
- return -ENOENT;
-
- *count_p = count;
- *index_p = index;
-
- return 0;
-}
-
-int
-lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gents_p,
- int *index_p, int *count_p,
- struct lstcon_node_ent __user *dents_up)
-{
- struct lstcon_ndlist_ent *gentp;
- struct lstcon_group *grp;
- struct lstcon_ndlink *ndl;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (dents_up) {
- /* verbose query */
- rc = lstcon_nodes_getent(&grp->grp_ndl_list,
- index_p, count_p, dents_up);
- lstcon_group_decref(grp);
-
- return rc;
- }
-
- /* non-verbose query */
- gentp = kzalloc(sizeof(struct lstcon_ndlist_ent), GFP_NOFS);
- if (!gentp) {
- CERROR("Can't allocate ndlist_ent\n");
- lstcon_group_decref(grp);
-
- return -ENOMEM;
- }
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp);
-
- rc = copy_to_user(gents_p, gentp,
- sizeof(struct lstcon_ndlist_ent)) ? -EFAULT : 0;
-
- kfree(gentp);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-static int
-lstcon_batch_find(const char *name, struct lstcon_batch **batpp)
-{
- struct lstcon_batch *bat;
-
- list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
- if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) {
- *batpp = bat;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-int
-lstcon_batch_add(char *name)
-{
- struct lstcon_batch *bat;
- int i;
- int rc;
-
- rc = !lstcon_batch_find(name, &bat) ? -EEXIST : 0;
- if (rc) {
- CDEBUG(D_NET, "Batch %s already exists\n", name);
- return rc;
- }
-
- bat = kzalloc(sizeof(struct lstcon_batch), GFP_NOFS);
- if (!bat) {
- CERROR("Can't allocate descriptor for batch %s\n", name);
- return -ENOMEM;
- }
-
- bat->bat_cli_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
- GFP_KERNEL);
- if (!bat->bat_cli_hash) {
- CERROR("Can't allocate hash for batch %s\n", name);
- kfree(bat);
-
- return -ENOMEM;
- }
-
- bat->bat_srv_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
- GFP_KERNEL);
- if (!bat->bat_srv_hash) {
- CERROR("Can't allocate hash for batch %s\n", name);
- kfree(bat->bat_cli_hash);
- kfree(bat);
-
- return -ENOMEM;
- }
-
- if (strlen(name) > sizeof(bat->bat_name) - 1) {
- kfree(bat->bat_srv_hash);
- kfree(bat->bat_cli_hash);
- kfree(bat);
- return -E2BIG;
- }
- strncpy(bat->bat_name, name, sizeof(bat->bat_name));
- bat->bat_hdr.tsb_index = 0;
- bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie;
-
- bat->bat_ntest = 0;
- bat->bat_state = LST_BATCH_IDLE;
-
- INIT_LIST_HEAD(&bat->bat_cli_list);
- INIT_LIST_HEAD(&bat->bat_srv_list);
- INIT_LIST_HEAD(&bat->bat_test_list);
- INIT_LIST_HEAD(&bat->bat_trans_list);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- INIT_LIST_HEAD(&bat->bat_cli_hash[i]);
- INIT_LIST_HEAD(&bat->bat_srv_hash[i]);
- }
-
- list_add_tail(&bat->bat_link, &console_session.ses_bat_list);
-
- return rc;
-}
-
-int
-lstcon_batch_list(int index, int len, char __user *name_up)
-{
- struct lstcon_batch *bat;
-
- LASSERT(name_up);
- LASSERT(index >= 0);
-
- list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
- if (!index--) {
- return copy_to_user(name_up, bat->bat_name, len) ?
- -EFAULT : 0;
- }
- }
-
- return -ENOENT;
-}
-
-int
-lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
- int server, int testidx, int *index_p, int *ndent_p,
- struct lstcon_node_ent __user *dents_up)
-{
- struct lstcon_test_batch_ent *entp;
- struct list_head *clilst;
- struct list_head *srvlst;
- struct lstcon_test *test = NULL;
- struct lstcon_batch *bat;
- struct lstcon_ndlink *ndl;
- int rc;
-
- rc = lstcon_batch_find(name, &bat);
- if (rc) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- if (testidx > 0) {
- /* query test, test index start from 1 */
- list_for_each_entry(test, &bat->bat_test_list, tes_link) {
- if (testidx-- == 1)
- break;
- }
-
- if (testidx > 0) {
- CDEBUG(D_NET, "Can't find specified test in batch\n");
- return -ENOENT;
- }
- }
-
- clilst = !test ? &bat->bat_cli_list :
- &test->tes_src_grp->grp_ndl_list;
- srvlst = !test ? &bat->bat_srv_list :
- &test->tes_dst_grp->grp_ndl_list;
-
- if (dents_up) {
- rc = lstcon_nodes_getent((server ? srvlst : clilst),
- index_p, ndent_p, dents_up);
- return rc;
- }
-
- /* non-verbose query */
- entp = kzalloc(sizeof(struct lstcon_test_batch_ent), GFP_NOFS);
- if (!entp)
- return -ENOMEM;
-
- if (!test) {
- entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
- entp->u.tbe_batch.bae_state = bat->bat_state;
- } else {
- entp->u.tbe_test.tse_type = test->tes_type;
- entp->u.tbe_test.tse_loop = test->tes_loop;
- entp->u.tbe_test.tse_concur = test->tes_concur;
- }
-
- list_for_each_entry(ndl, clilst, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle);
-
- list_for_each_entry(ndl, srvlst, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle);
-
- rc = copy_to_user(ent_up, entp,
- sizeof(struct lstcon_test_batch_ent)) ? -EFAULT : 0;
-
- kfree(entp);
-
- return rc;
-}
-
-static int
-lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
- switch (transop) {
- case LST_TRANS_TSBRUN:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return -ENETDOWN;
- break;
-
- case LST_TRANS_TSBSTOP:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return 0;
- break;
-
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- break;
- }
-
- return 1;
-}
-
-static int
-lstcon_batch_op(struct lstcon_batch *bat, int transop,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- int rc;
-
- rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
- &bat->bat_trans_list, transop,
- bat, lstcon_batrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up)
-{
- struct lstcon_batch *bat;
- int rc;
-
- if (lstcon_batch_find(name, &bat)) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- bat->bat_arg = timeout;
-
- rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up);
-
- /* mark batch as running if it's started in any node */
- if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0))
- bat->bat_state = LST_BATCH_RUNNING;
-
- return rc;
-}
-
-int
-lstcon_batch_stop(char *name, int force, struct list_head __user *result_up)
-{
- struct lstcon_batch *bat;
- int rc;
-
- if (lstcon_batch_find(name, &bat)) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- bat->bat_arg = force;
-
- rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up);
-
- /* mark batch as stopped if all RPCs finished */
- if (!lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0))
- bat->bat_state = LST_BATCH_IDLE;
-
- return rc;
-}
-
-static void
-lstcon_batch_destroy(struct lstcon_batch *bat)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_test *test;
- int i;
-
- list_del(&bat->bat_link);
-
- while (!list_empty(&bat->bat_test_list)) {
- test = list_entry(bat->bat_test_list.next,
- struct lstcon_test, tes_link);
- LASSERT(list_empty(&test->tes_trans_list));
-
- list_del(&test->tes_link);
-
- lstcon_group_decref(test->tes_src_grp);
- lstcon_group_decref(test->tes_dst_grp);
-
- kfree(test);
- }
-
- LASSERT(list_empty(&bat->bat_trans_list));
-
- while (!list_empty(&bat->bat_cli_list)) {
- ndl = list_entry(bat->bat_cli_list.next,
- struct lstcon_ndlink, ndl_link);
- list_del_init(&ndl->ndl_link);
-
- lstcon_ndlink_release(ndl);
- }
-
- while (!list_empty(&bat->bat_srv_list)) {
- ndl = list_entry(bat->bat_srv_list.next,
- struct lstcon_ndlink, ndl_link);
- list_del_init(&ndl->ndl_link);
-
- lstcon_ndlink_release(ndl);
- }
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- LASSERT(list_empty(&bat->bat_cli_hash[i]));
- LASSERT(list_empty(&bat->bat_srv_hash[i]));
- }
-
- kfree(bat->bat_cli_hash);
- kfree(bat->bat_srv_hash);
- kfree(bat);
-}
-
-static int
-lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
- struct lstcon_test *test;
- struct lstcon_batch *batch;
- struct lstcon_ndlink *ndl;
- struct list_head *hash;
- struct list_head *head;
-
- test = (struct lstcon_test *)arg;
- LASSERT(test);
-
- batch = test->tes_batch;
- LASSERT(batch);
-
- if (test->tes_oneside &&
- transop == LST_TRANS_TSBSRVADD)
- return 0;
-
- if (nd->nd_state != LST_NODE_ACTIVE)
- return -ENETDOWN;
-
- if (transop == LST_TRANS_TSBCLIADD) {
- hash = batch->bat_cli_hash;
- head = &batch->bat_cli_list;
-
- } else {
- LASSERT(transop == LST_TRANS_TSBSRVADD);
-
- hash = batch->bat_srv_hash;
- head = &batch->bat_srv_list;
- }
-
- LASSERT(nd->nd_id.nid != LNET_NID_ANY);
-
- if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1))
- return -ENOMEM;
-
- if (list_empty(&ndl->ndl_link))
- list_add_tail(&ndl->ndl_link, head);
-
- return 1;
-}
-
-static int
-lstcon_test_nodes_add(struct lstcon_test *test,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- int transop;
- int rc;
-
- LASSERT(test->tes_src_grp);
- LASSERT(test->tes_dst_grp);
-
- transop = LST_TRANS_TSBSRVADD;
- grp = test->tes_dst_grp;
-again:
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &test->tes_trans_list, transop,
- test, lstcon_testrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- if (lstcon_trans_stat()->trs_rpc_errno ||
- lstcon_trans_stat()->trs_fwk_errno) {
- lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* return if any error */
- CDEBUG(D_NET, "Failed to add test %s, RPC error %d, framework error %d\n",
- transop == LST_TRANS_TSBCLIADD ? "client" : "server",
- lstcon_trans_stat()->trs_rpc_errno,
- lstcon_trans_stat()->trs_fwk_errno);
-
- return rc;
- }
-
- lstcon_rpc_trans_destroy(trans);
-
- if (transop == LST_TRANS_TSBCLIADD)
- return rc;
-
- transop = LST_TRANS_TSBCLIADD;
- grp = test->tes_src_grp;
- test->tes_cliidx = 0;
-
- /* requests to test clients */
- goto again;
-}
-
-static int
-lstcon_verify_batch(const char *name, struct lstcon_batch **batch)
-{
- int rc;
-
- rc = lstcon_batch_find(name, batch);
- if (rc) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return rc;
- }
-
- if ((*batch)->bat_state != LST_BATCH_IDLE) {
- CDEBUG(D_NET, "Can't change running batch %s\n", name);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-lstcon_verify_group(const char *name, struct lstcon_group **grp)
-{
- int rc;
- struct lstcon_ndlink *ndl;
-
- rc = lstcon_group_find(name, grp);
- if (rc) {
- CDEBUG(D_NET, "can't find group %s\n", name);
- return rc;
- }
-
- list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) {
- if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE)
- return 0;
- }
-
- CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name);
-
- return -EINVAL;
-}
-
-int
-lstcon_test_add(char *batch_name, int type, int loop,
- int concur, int dist, int span,
- char *src_name, char *dst_name,
- void *param, int paramlen, int *retp,
- struct list_head __user *result_up)
-{
- struct lstcon_test *test = NULL;
- int rc;
- struct lstcon_group *src_grp = NULL;
- struct lstcon_group *dst_grp = NULL;
- struct lstcon_batch *batch = NULL;
-
- /*
- * verify that a batch of the given name exists, and the groups
- * that will be part of the batch exist and have at least one
- * active node
- */
- rc = lstcon_verify_batch(batch_name, &batch);
- if (rc)
- goto out;
-
- rc = lstcon_verify_group(src_name, &src_grp);
- if (rc)
- goto out;
-
- rc = lstcon_verify_group(dst_name, &dst_grp);
- if (rc)
- goto out;
-
- if (dst_grp->grp_userland)
- *retp = 1;
-
- test = kzalloc(offsetof(struct lstcon_test, tes_param[paramlen]),
- GFP_KERNEL);
- if (!test) {
- CERROR("Can't allocate test descriptor\n");
- rc = -ENOMEM;
-
- goto out;
- }
-
- test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id;
- test->tes_batch = batch;
- test->tes_type = type;
- test->tes_oneside = 0; /* TODO */
- test->tes_loop = loop;
- test->tes_concur = concur;
- test->tes_stop_onerr = 1; /* TODO */
- test->tes_span = span;
- test->tes_dist = dist;
- test->tes_cliidx = 0; /* just used for creating RPC */
- test->tes_src_grp = src_grp;
- test->tes_dst_grp = dst_grp;
- INIT_LIST_HEAD(&test->tes_trans_list);
-
- if (param) {
- test->tes_paramlen = paramlen;
- memcpy(&test->tes_param[0], param, paramlen);
- }
-
- rc = lstcon_test_nodes_add(test, result_up);
-
- if (rc)
- goto out;
-
- if (lstcon_trans_stat()->trs_rpc_errno ||
- lstcon_trans_stat()->trs_fwk_errno)
- CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type,
- batch_name);
-
- /* add to test list anyway, so user can check what's going on */
- list_add_tail(&test->tes_link, &batch->bat_test_list);
-
- batch->bat_ntest++;
- test->tes_hdr.tsb_index = batch->bat_ntest;
-
- /* hold groups so nobody can change them */
- return rc;
-out:
- kfree(test);
-
- if (dst_grp)
- lstcon_group_decref(dst_grp);
-
- if (src_grp)
- lstcon_group_decref(src_grp);
-
- return rc;
-}
-
-static int
-lstcon_test_find(struct lstcon_batch *batch, int idx,
- struct lstcon_test **testpp)
-{
- struct lstcon_test *test;
-
- list_for_each_entry(test, &batch->bat_test_list, tes_link) {
- if (idx == test->tes_hdr.tsb_index) {
- *testpp = test;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-static int
-lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg,
- struct lstcon_rpc_ent __user *ent_up)
-{
- struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
-
- LASSERT(transop == LST_TRANS_TSBCLIQRY ||
- transop == LST_TRANS_TSBSRVQRY);
-
- /* positive errno, framework error code */
- if (copy_to_user(&ent_up->rpe_priv[0], &rep->bar_active,
- sizeof(rep->bar_active)))
- return -EFAULT;
-
- return 0;
-}
-
-int
-lstcon_test_batch_query(char *name, int testidx, int client,
- int timeout, struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct list_head *translist;
- struct list_head *ndlist;
- struct lstcon_tsb_hdr *hdr;
- struct lstcon_batch *batch;
- struct lstcon_test *test = NULL;
- int transop;
- int rc;
-
- rc = lstcon_batch_find(name, &batch);
- if (rc) {
- CDEBUG(D_NET, "Can't find batch: %s\n", name);
- return rc;
- }
-
- if (!testidx) {
- translist = &batch->bat_trans_list;
- ndlist = &batch->bat_cli_list;
- hdr = &batch->bat_hdr;
- } else {
- /* query specified test only */
- rc = lstcon_test_find(batch, testidx, &test);
- if (rc) {
- CDEBUG(D_NET, "Can't find test: %d\n", testidx);
- return rc;
- }
-
- translist = &test->tes_trans_list;
- ndlist = &test->tes_src_grp->grp_ndl_list;
- hdr = &test->tes_hdr;
- }
-
- transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY;
-
- rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr,
- lstcon_batrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, timeout);
-
- /* query a batch, not a test */
- if (!testidx &&
- !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) &&
- !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) {
- /* all RPCs finished, and no active test */
- batch->bat_state = LST_BATCH_IDLE;
- }
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_tsbrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-static int
-lstcon_statrpc_readent(int transop, struct srpc_msg *msg,
- struct lstcon_rpc_ent __user *ent_up)
-{
- struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
- struct sfw_counters __user *sfwk_stat;
- struct srpc_counters __user *srpc_stat;
- struct lnet_counters __user *lnet_stat;
-
- if (rep->str_status)
- return 0;
-
- sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0];
- srpc_stat = (struct srpc_counters __user *)(sfwk_stat + 1);
- lnet_stat = (struct lnet_counters __user *)(srpc_stat + 1);
-
- if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
- copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) ||
- copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat)))
- return -EFAULT;
-
- return 0;
-}
-
-static int
-lstcon_ndlist_stat(struct list_head *ndlist,
- int timeout, struct list_head __user *result_up)
-{
- struct list_head head;
- struct lstcon_rpc_trans *trans;
- int rc;
-
- INIT_LIST_HEAD(&head);
-
- rc = lstcon_rpc_trans_ndlist(ndlist, &head,
- LST_TRANS_STATQRY, NULL, NULL, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_statrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_group_stat(char *grp_name, int timeout,
- struct list_head __user *result_up)
-{
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(grp_name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", grp_name);
- return rc;
- }
-
- rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
- int timeout, struct list_head __user *result_up)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_group *tmp;
- struct lnet_process_id id;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0 ; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* add to tmp group */
- rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2);
- if (rc) {
- CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET,
- "Failed to find or create %s: %d\n",
- libcfs_id2str(id), rc);
- break;
- }
- }
-
- if (rc) {
- lstcon_group_decref(tmp);
- return rc;
- }
-
- rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up);
-
- lstcon_group_decref(tmp);
-
- return rc;
-}
-
-static int
-lstcon_debug_ndlist(struct list_head *ndlist,
- struct list_head *translist,
- int timeout, struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- int rc;
-
- rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
- NULL, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_sesrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_session_debug(int timeout, struct list_head __user *result_up)
-{
- return lstcon_debug_ndlist(&console_session.ses_ndl_list,
- NULL, timeout, result_up);
-}
-
-int
-lstcon_batch_debug(int timeout, char *name,
- int client, struct list_head __user *result_up)
-{
- struct lstcon_batch *bat;
- int rc;
-
- rc = lstcon_batch_find(name, &bat);
- if (rc)
- return -ENOENT;
-
- rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list :
- &bat->bat_srv_list,
- NULL, timeout, result_up);
-
- return rc;
-}
-
-int
-lstcon_group_debug(int timeout, char *name,
- struct list_head __user *result_up)
-{
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc)
- return -ENOENT;
-
- rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
- timeout, result_up);
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_nodes_debug(int timeout, int count,
- struct lnet_process_id __user *ids_up,
- struct list_head __user *result_up)
-{
- struct lnet_process_id id;
- struct lstcon_ndlink *ndl;
- struct lstcon_group *grp;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &grp);
- if (rc) {
- CDEBUG(D_NET, "Out of memory\n");
- return rc;
- }
-
- for (i = 0; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* node is added to tmp group */
- rc = lstcon_group_ndlink_find(grp, id, &ndl, 1);
- if (rc) {
- CERROR("Can't create node link\n");
- break;
- }
- }
-
- if (rc) {
- lstcon_group_decref(grp);
- return rc;
- }
-
- rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
- timeout, result_up);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_session_match(struct lst_sid sid)
-{
- return (console_session.ses_id.ses_nid == sid.ses_nid &&
- console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0;
-}
-
-static void
-lstcon_new_session_id(struct lst_sid *sid)
-{
- struct lnet_process_id id;
-
- LASSERT(console_session.ses_state == LST_SESSION_NONE);
-
- LNetGetId(1, &id);
- sid->ses_nid = id.nid;
- sid->ses_stamp = cfs_time_current();
-}
-
-int
-lstcon_session_new(char *name, int key, unsigned int feats,
- int timeout, int force, struct lst_sid __user *sid_up)
-{
- int rc = 0;
- int i;
-
- if (console_session.ses_state != LST_SESSION_NONE) {
- /* session exists */
- if (!force) {
- CNETERR("Session %s already exists\n",
- console_session.ses_name);
- return -EEXIST;
- }
-
- rc = lstcon_session_end();
-
- /* lstcon_session_end() only return local error */
- if (rc)
- return rc;
- }
-
- if (feats & ~LST_FEATS_MASK) {
- CNETERR("Unknown session features %x\n",
- (feats & ~LST_FEATS_MASK));
- return -EINVAL;
- }
-
- for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
- LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
-
- lstcon_new_session_id(&console_session.ses_id);
-
- console_session.ses_key = key;
- console_session.ses_state = LST_SESSION_ACTIVE;
- console_session.ses_force = !!force;
- console_session.ses_features = feats;
- console_session.ses_feats_updated = 0;
- console_session.ses_timeout = (timeout <= 0) ?
- LST_CONSOLE_TIMEOUT : timeout;
-
- if (strlen(name) > sizeof(console_session.ses_name) - 1)
- return -E2BIG;
- strlcpy(console_session.ses_name, name,
- sizeof(console_session.ses_name));
-
- rc = lstcon_batch_add(LST_DEFAULT_BATCH);
- if (rc)
- return rc;
-
- rc = lstcon_rpc_pinger_start();
- if (rc) {
- struct lstcon_batch *bat = NULL;
-
- lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
- lstcon_batch_destroy(bat);
-
- return rc;
- }
-
- if (!copy_to_user(sid_up, &console_session.ses_id,
- sizeof(struct lst_sid)))
- return rc;
-
- lstcon_session_end();
-
- return -EFAULT;
-}
-
-int
-lstcon_session_info(struct lst_sid __user *sid_up, int __user *key_up,
- unsigned __user *featp,
- struct lstcon_ndlist_ent __user *ndinfo_up,
- char __user *name_up, int len)
-{
- struct lstcon_ndlist_ent *entp;
- struct lstcon_ndlink *ndl;
- int rc = 0;
-
- if (console_session.ses_state != LST_SESSION_ACTIVE)
- return -ESRCH;
-
- entp = kzalloc(sizeof(*entp), GFP_NOFS);
- if (!entp)
- return -ENOMEM;
-
- list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, entp);
-
- if (copy_to_user(sid_up, &console_session.ses_id,
- sizeof(*sid_up)) ||
- copy_to_user(key_up, &console_session.ses_key,
- sizeof(*key_up)) ||
- copy_to_user(featp, &console_session.ses_features,
- sizeof(*featp)) ||
- copy_to_user(ndinfo_up, entp, sizeof(*entp)) ||
- copy_to_user(name_up, console_session.ses_name, len))
- rc = -EFAULT;
-
- kfree(entp);
-
- return rc;
-}
-
-int
-lstcon_session_end(void)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- struct lstcon_batch *bat;
- int rc = 0;
-
- LASSERT(console_session.ses_state == LST_SESSION_ACTIVE);
-
- rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list,
- NULL, LST_TRANS_SESEND, NULL,
- lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- console_session.ses_shutdown = 1;
-
- lstcon_rpc_pinger_stop();
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- lstcon_rpc_trans_destroy(trans);
- /* User can do nothing even rpc failed, so go on */
-
- /* waiting for orphan rpcs to die */
- lstcon_rpc_cleanup_wait();
-
- console_session.ses_id = LST_INVALID_SID;
- console_session.ses_state = LST_SESSION_NONE;
- console_session.ses_key = 0;
- console_session.ses_force = 0;
- console_session.ses_feats_updated = 0;
-
- /* destroy all batches */
- while (!list_empty(&console_session.ses_bat_list)) {
- bat = list_entry(console_session.ses_bat_list.next,
- struct lstcon_batch, bat_link);
-
- lstcon_batch_destroy(bat);
- }
-
- /* destroy all groups */
- while (!list_empty(&console_session.ses_grp_list)) {
- grp = list_entry(console_session.ses_grp_list.next,
- struct lstcon_group, grp_link);
- LASSERT(grp->grp_ref == 1);
-
- lstcon_group_decref(grp);
- }
-
- /* all nodes should be released */
- LASSERT(list_empty(&console_session.ses_ndl_list));
-
- console_session.ses_shutdown = 0;
- console_session.ses_expired = 0;
-
- return rc;
-}
-
-int
-lstcon_session_feats_check(unsigned int feats)
-{
- int rc = 0;
-
- if (feats & ~LST_FEATS_MASK) {
- CERROR("Can't support these features: %x\n",
- (feats & ~LST_FEATS_MASK));
- return -EPROTO;
- }
-
- spin_lock(&console_session.ses_rpc_lock);
-
- if (!console_session.ses_feats_updated) {
- console_session.ses_feats_updated = 1;
- console_session.ses_features = feats;
- }
-
- if (console_session.ses_features != feats)
- rc = -EPROTO;
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- if (rc) {
- CERROR("remote features %x do not match with session features %x of console\n",
- feats, console_session.ses_features);
- }
-
- return rc;
-}
-
-static int
-lstcon_acceptor_handle(struct srpc_server_rpc *rpc)
-{
- struct srpc_msg *rep = &rpc->srpc_replymsg;
- struct srpc_msg *req = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_join_reqst *jreq = &req->msg_body.join_reqst;
- struct srpc_join_reply *jrep = &rep->msg_body.join_reply;
- struct lstcon_group *grp = NULL;
- struct lstcon_ndlink *ndl;
- int rc = 0;
-
- sfw_unpack_message(req);
-
- mutex_lock(&console_session.ses_mutex);
-
- jrep->join_sid = console_session.ses_id;
-
- if (console_session.ses_id.ses_nid == LNET_NID_ANY) {
- jrep->join_status = ESRCH;
- goto out;
- }
-
- if (lstcon_session_feats_check(req->msg_ses_feats)) {
- jrep->join_status = EPROTO;
- goto out;
- }
-
- if (jreq->join_sid.ses_nid != LNET_NID_ANY &&
- !lstcon_session_match(jreq->join_sid)) {
- jrep->join_status = EBUSY;
- goto out;
- }
-
- if (lstcon_group_find(jreq->join_group, &grp)) {
- rc = lstcon_group_alloc(jreq->join_group, &grp);
- if (rc) {
- CERROR("Out of memory\n");
- goto out;
- }
-
- list_add_tail(&grp->grp_link,
- &console_session.ses_grp_list);
- lstcon_group_addref(grp);
- }
-
- if (grp->grp_ref > 2) {
- /* Group in using */
- jrep->join_status = EBUSY;
- goto out;
- }
-
- rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0);
- if (!rc) {
- jrep->join_status = EEXIST;
- goto out;
- }
-
- rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1);
- if (rc) {
- CERROR("Out of memory\n");
- goto out;
- }
-
- ndl->ndl_node->nd_state = LST_NODE_ACTIVE;
- ndl->ndl_node->nd_timeout = console_session.ses_timeout;
-
- if (!grp->grp_userland)
- grp->grp_userland = 1;
-
- strlcpy(jrep->join_session, console_session.ses_name,
- sizeof(jrep->join_session));
- jrep->join_timeout = console_session.ses_timeout;
- jrep->join_status = 0;
-
-out:
- rep->msg_ses_feats = console_session.ses_features;
- if (grp)
- lstcon_group_decref(grp);
-
- mutex_unlock(&console_session.ses_mutex);
-
- return rc;
-}
-
-static struct srpc_service lstcon_acceptor_service;
-
-static void lstcon_init_acceptor_service(void)
-{
- /* initialize selftest console acceptor service table */
- lstcon_acceptor_service.sv_name = "join session";
- lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle;
- lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN;
- lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
-}
-
-static DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry);
-
-/* initialize console */
-int
-lstcon_console_init(void)
-{
- int i;
- int rc;
-
- memset(&console_session, 0, sizeof(struct lstcon_session));
-
- console_session.ses_id = LST_INVALID_SID;
- console_session.ses_state = LST_SESSION_NONE;
- console_session.ses_timeout = 0;
- console_session.ses_force = 0;
- console_session.ses_expired = 0;
- console_session.ses_feats_updated = 0;
- console_session.ses_features = LST_FEATS_MASK;
- console_session.ses_laststamp = ktime_get_real_seconds();
-
- mutex_init(&console_session.ses_mutex);
-
- INIT_LIST_HEAD(&console_session.ses_ndl_list);
- INIT_LIST_HEAD(&console_session.ses_grp_list);
- INIT_LIST_HEAD(&console_session.ses_bat_list);
- INIT_LIST_HEAD(&console_session.ses_trans_list);
-
- console_session.ses_ndl_hash =
- kmalloc(sizeof(struct list_head) * LST_GLOBAL_HASHSIZE, GFP_KERNEL);
- if (!console_session.ses_ndl_hash)
- return -ENOMEM;
-
- for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
- INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]);
-
- /* initialize acceptor service table */
- lstcon_init_acceptor_service();
-
- rc = srpc_add_service(&lstcon_acceptor_service);
- LASSERT(rc != -EBUSY);
- if (rc) {
- kfree(console_session.ses_ndl_hash);
- return rc;
- }
-
- rc = srpc_service_add_buffers(&lstcon_acceptor_service,
- lstcon_acceptor_service.sv_wi_total);
- if (rc) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = libcfs_register_ioctl(&lstcon_ioctl_handler);
-
- if (!rc) {
- lstcon_rpc_module_init();
- return 0;
- }
-
-out:
- srpc_shutdown_service(&lstcon_acceptor_service);
- srpc_remove_service(&lstcon_acceptor_service);
-
- kfree(console_session.ses_ndl_hash);
-
- srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
- return rc;
-}
-
-int
-lstcon_console_fini(void)
-{
- int i;
-
- libcfs_deregister_ioctl(&lstcon_ioctl_handler);
-
- mutex_lock(&console_session.ses_mutex);
-
- srpc_shutdown_service(&lstcon_acceptor_service);
- srpc_remove_service(&lstcon_acceptor_service);
-
- if (console_session.ses_state != LST_SESSION_NONE)
- lstcon_session_end();
-
- lstcon_rpc_module_fini();
-
- mutex_unlock(&console_session.ses_mutex);
-
- LASSERT(list_empty(&console_session.ses_ndl_list));
- LASSERT(list_empty(&console_session.ses_grp_list));
- LASSERT(list_empty(&console_session.ses_bat_list));
- LASSERT(list_empty(&console_session.ses_trans_list));
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++)
- LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
-
- kfree(console_session.ses_ndl_hash);
-
- srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
deleted file mode 100644
index 3933ed4cca93..000000000000
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ /dev/null
@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/console.h
- *
- * kernel structure for LST console
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#ifndef __LST_CONSOLE_H__
-#define __LST_CONSOLE_H__
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "selftest.h"
-#include "conrpc.h"
-
-/* node descriptor */
-struct lstcon_node {
- struct lnet_process_id nd_id; /* id of the node */
- int nd_ref; /* reference count */
- int nd_state; /* state of the node */
- int nd_timeout; /* session timeout */
- unsigned long nd_stamp; /* timestamp of last replied RPC */
- struct lstcon_rpc nd_ping; /* ping rpc */
-};
-
-/* node link descriptor */
-struct lstcon_ndlink {
- struct list_head ndl_link; /* chain on list */
- struct list_head ndl_hlink; /* chain on hash */
- struct lstcon_node *ndl_node; /* pointer to node */
-};
-
-/* (alias of nodes) group descriptor */
-struct lstcon_group {
- struct list_head grp_link; /* chain on global group list
- */
- int grp_ref; /* reference count */
- int grp_userland; /* has userland nodes */
- int grp_nnode; /* # of nodes */
- char grp_name[LST_NAME_SIZE]; /* group name */
-
- struct list_head grp_trans_list; /* transaction list */
- struct list_head grp_ndl_list; /* nodes list */
- struct list_head grp_ndl_hash[0]; /* hash table for nodes */
-};
-
-#define LST_BATCH_IDLE 0xB0 /* idle batch */
-#define LST_BATCH_RUNNING 0xB1 /* running batch */
-
-struct lstcon_tsb_hdr {
- struct lst_bid tsb_id; /* batch ID */
- int tsb_index; /* test index */
-};
-
-/* (tests ) batch descriptor */
-struct lstcon_batch {
- struct lstcon_tsb_hdr bat_hdr; /* test_batch header */
- struct list_head bat_link; /* chain on session's batches list */
- int bat_ntest; /* # of test */
- int bat_state; /* state of the batch */
- int bat_arg; /* parameter for run|stop, timeout
- * for run, force for stop
- */
- char bat_name[LST_NAME_SIZE];/* name of batch */
-
- struct list_head bat_test_list; /* list head of tests (struct lstcon_test)
- */
- struct list_head bat_trans_list; /* list head of transaction */
- struct list_head bat_cli_list; /* list head of client nodes
- * (struct lstcon_node)
- */
- struct list_head *bat_cli_hash; /* hash table of client nodes */
- struct list_head bat_srv_list; /* list head of server nodes */
- struct list_head *bat_srv_hash; /* hash table of server nodes */
-};
-
-/* a single test descriptor */
-struct lstcon_test {
- struct lstcon_tsb_hdr tes_hdr; /* test batch header */
- struct list_head tes_link; /* chain on batch's tests list */
- struct lstcon_batch *tes_batch; /* pointer to batch */
-
- int tes_type; /* type of the test, i.e: bulk, ping */
- int tes_stop_onerr; /* stop on error */
- int tes_oneside; /* one-sided test */
- int tes_concur; /* concurrency */
- int tes_loop; /* loop count */
- int tes_dist; /* nodes distribution of target group */
- int tes_span; /* nodes span of target group */
- int tes_cliidx; /* client index, used for RPC creating */
-
- struct list_head tes_trans_list; /* transaction list */
- struct lstcon_group *tes_src_grp; /* group run the test */
- struct lstcon_group *tes_dst_grp; /* target group */
-
- int tes_paramlen; /* test parameter length */
- char tes_param[0]; /* test parameter */
-};
-
-#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */
-#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */
-
-#define LST_SESSION_NONE 0x0 /* no session */
-#define LST_SESSION_ACTIVE 0x1 /* working session */
-
-#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */
-
-struct lstcon_session {
- struct mutex ses_mutex; /* only 1 thread in session */
- struct lst_sid ses_id; /* global session id */
- int ses_key; /* local session key */
- int ses_state; /* state of session */
- int ses_timeout; /* timeout in seconds */
- time64_t ses_laststamp; /* last operation stamp (seconds)
- */
- unsigned int ses_features; /* tests features of the session
- */
- unsigned int ses_feats_updated:1; /* features are synced with
- * remote test nodes
- */
- unsigned int ses_force:1; /* force creating */
- unsigned int ses_shutdown:1; /* session is shutting down */
- unsigned int ses_expired:1; /* console is timedout */
- __u64 ses_id_cookie; /* batch id cookie */
- char ses_name[LST_NAME_SIZE];/* session name */
- struct lstcon_rpc_trans *ses_ping; /* session pinger */
- struct stt_timer ses_ping_timer; /* timer for pinger */
- struct lstcon_trans_stat ses_trans_stat; /* transaction stats */
-
- struct list_head ses_trans_list; /* global list of transaction */
- struct list_head ses_grp_list; /* global list of groups */
- struct list_head ses_bat_list; /* global list of batches */
- struct list_head ses_ndl_list; /* global list of nodes */
- struct list_head *ses_ndl_hash; /* hash table of nodes */
-
- spinlock_t ses_rpc_lock; /* serialize */
- atomic_t ses_rpc_counter; /* # of initialized RPCs */
- struct list_head ses_rpc_freelist; /* idle console rpc */
-}; /* session descriptor */
-
-extern struct lstcon_session console_session;
-
-static inline struct lstcon_trans_stat *
-lstcon_trans_stat(void)
-{
- return &console_session.ses_trans_stat;
-}
-
-static inline struct list_head *
-lstcon_id2hash(struct lnet_process_id id, struct list_head *hash)
-{
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
-
- return &hash[idx];
-}
-
-int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
-int lstcon_console_init(void);
-int lstcon_console_fini(void);
-int lstcon_session_match(struct lst_sid sid);
-int lstcon_session_new(char *name, int key, unsigned int version,
- int timeout, int flags, struct lst_sid __user *sid_up);
-int lstcon_session_info(struct lst_sid __user *sid_up, int __user *key,
- unsigned __user *verp, struct lstcon_ndlist_ent __user *entp,
- char __user *name_up, int len);
-int lstcon_session_end(void);
-int lstcon_session_debug(int timeout, struct list_head __user *result_up);
-int lstcon_session_feats_check(unsigned int feats);
-int lstcon_batch_debug(int timeout, char *name,
- int client, struct list_head __user *result_up);
-int lstcon_group_debug(int timeout, char *name,
- struct list_head __user *result_up);
-int lstcon_nodes_debug(int timeout, int nnd,
- struct lnet_process_id __user *nds_up,
- struct list_head __user *result_up);
-int lstcon_group_add(char *name);
-int lstcon_group_del(char *name);
-int lstcon_group_clean(char *name, int args);
-int lstcon_group_refresh(char *name, struct list_head __user *result_up);
-int lstcon_nodes_add(char *name, int nnd, struct lnet_process_id __user *nds_up,
- unsigned int *featp, struct list_head __user *result_up);
-int lstcon_nodes_remove(char *name, int nnd,
- struct lnet_process_id __user *nds_up,
- struct list_head __user *result_up);
-int lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gent_up,
- int *index_p, int *ndent_p,
- struct lstcon_node_ent __user *ndents_up);
-int lstcon_group_list(int idx, int len, char __user *name_up);
-int lstcon_batch_add(char *name);
-int lstcon_batch_run(char *name, int timeout,
- struct list_head __user *result_up);
-int lstcon_batch_stop(char *name, int force,
- struct list_head __user *result_up);
-int lstcon_test_batch_query(char *name, int testidx,
- int client, int timeout,
- struct list_head __user *result_up);
-int lstcon_batch_del(char *name);
-int lstcon_batch_list(int idx, int namelen, char __user *name_up);
-int lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
- int server, int testidx, int *index_p,
- int *ndent_p, struct lstcon_node_ent __user *dents_up);
-int lstcon_group_stat(char *grp_name, int timeout,
- struct list_head __user *result_up);
-int lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
- int timeout, struct list_head __user *result_up);
-int lstcon_test_add(char *batch_name, int type, int loop,
- int concur, int dist, int span,
- char *src_name, char *dst_name,
- void *param, int paramlen, int *retp,
- struct list_head __user *result_up);
-#endif
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
deleted file mode 100644
index 0ca1e3a780ca..000000000000
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ /dev/null
@@ -1,1786 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/framework.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-struct lst_sid LST_INVALID_SID = {LNET_NID_ANY, -1};
-
-static int session_timeout = 100;
-module_param(session_timeout, int, 0444);
-MODULE_PARM_DESC(session_timeout, "test session timeout in seconds (100 by default, 0 == never)");
-
-static int rpc_timeout = 64;
-module_param(rpc_timeout, int, 0644);
-MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never)");
-
-#define sfw_unpack_id(id) \
-do { \
- __swab64s(&(id).nid); \
- __swab32s(&(id).pid); \
-} while (0)
-
-#define sfw_unpack_sid(sid) \
-do { \
- __swab64s(&(sid).ses_nid); \
- __swab64s(&(sid).ses_stamp); \
-} while (0)
-
-#define sfw_unpack_fw_counters(fc) \
-do { \
- __swab32s(&(fc).running_ms); \
- __swab32s(&(fc).active_batches); \
- __swab32s(&(fc).zombie_sessions); \
- __swab32s(&(fc).brw_errors); \
- __swab32s(&(fc).ping_errors); \
-} while (0)
-
-#define sfw_unpack_rpc_counters(rc) \
-do { \
- __swab32s(&(rc).errors); \
- __swab32s(&(rc).rpcs_sent); \
- __swab32s(&(rc).rpcs_rcvd); \
- __swab32s(&(rc).rpcs_dropped); \
- __swab32s(&(rc).rpcs_expired); \
- __swab64s(&(rc).bulk_get); \
- __swab64s(&(rc).bulk_put); \
-} while (0)
-
-#define sfw_unpack_lnet_counters(lc) \
-do { \
- __swab32s(&(lc).errors); \
- __swab32s(&(lc).msgs_max); \
- __swab32s(&(lc).msgs_alloc); \
- __swab32s(&(lc).send_count); \
- __swab32s(&(lc).recv_count); \
- __swab32s(&(lc).drop_count); \
- __swab32s(&(lc).route_count); \
- __swab64s(&(lc).send_length); \
- __swab64s(&(lc).recv_length); \
- __swab64s(&(lc).drop_length); \
- __swab64s(&(lc).route_length); \
-} while (0)
-
-#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive))
-#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive))
-
-static struct smoketest_framework {
- struct list_head fw_zombie_rpcs; /* RPCs to be recycled */
- struct list_head fw_zombie_sessions; /* stopping sessions */
- struct list_head fw_tests; /* registered test cases */
- atomic_t fw_nzombies; /* # zombie sessions */
- spinlock_t fw_lock; /* serialise */
- struct sfw_session *fw_session; /* _the_ session */
- int fw_shuttingdown; /* shutdown in progress */
- struct srpc_server_rpc *fw_active_srpc;/* running RPC */
-} sfw_data;
-
-/* forward ref's */
-int sfw_stop_batch(struct sfw_batch *tsb, int force);
-void sfw_destroy_session(struct sfw_session *sn);
-
-static inline struct sfw_test_case *
-sfw_find_test_case(int id)
-{
- struct sfw_test_case *tsc;
-
- LASSERT(id <= SRPC_SERVICE_MAX_ID);
- LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- if (tsc->tsc_srv_service->sv_id == id)
- return tsc;
- }
-
- return NULL;
-}
-
-static int
-sfw_register_test(struct srpc_service *service,
- struct sfw_test_client_ops *cliops)
-{
- struct sfw_test_case *tsc;
-
- if (sfw_find_test_case(service->sv_id)) {
- CERROR("Failed to register test %s (%d)\n",
- service->sv_name, service->sv_id);
- return -EEXIST;
- }
-
- tsc = kzalloc(sizeof(struct sfw_test_case), GFP_NOFS);
- if (!tsc)
- return -ENOMEM;
-
- tsc->tsc_cli_ops = cliops;
- tsc->tsc_srv_service = service;
-
- list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
- return 0;
-}
-
-static void
-sfw_add_session_timer(void)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct stt_timer *timer = &sn->sn_timer;
-
- LASSERT(!sfw_data.fw_shuttingdown);
-
- if (!sn || !sn->sn_timeout)
- return;
-
- LASSERT(!sn->sn_timer_active);
-
- sn->sn_timer_active = 1;
- timer->stt_expires = ktime_get_real_seconds() + sn->sn_timeout;
- stt_add_timer(timer);
-}
-
-static int
-sfw_del_session_timer(void)
-{
- struct sfw_session *sn = sfw_data.fw_session;
-
- if (!sn || !sn->sn_timer_active)
- return 0;
-
- LASSERT(sn->sn_timeout);
-
- if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
- sn->sn_timer_active = 0;
- return 0;
- }
-
- return -EBUSY; /* racing with sfw_session_expired() */
-}
-
-static void
-sfw_deactivate_session(void)
-__must_hold(&sfw_data.fw_lock)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- int nactive = 0;
- struct sfw_batch *tsb;
- struct sfw_test_case *tsc;
-
- if (!sn)
- return;
-
- LASSERT(!sn->sn_timer_active);
-
- sfw_data.fw_session = NULL;
- atomic_inc(&sfw_data.fw_nzombies);
- list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
-
- spin_unlock(&sfw_data.fw_lock);
-
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- srpc_abort_service(tsc->tsc_srv_service);
- }
-
- spin_lock(&sfw_data.fw_lock);
-
- list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
- if (sfw_batch_active(tsb)) {
- nactive++;
- sfw_stop_batch(tsb, 1);
- }
- }
-
- if (nactive)
- return; /* wait for active batches to stop */
-
- list_del_init(&sn->sn_list);
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_destroy_session(sn);
-
- spin_lock(&sfw_data.fw_lock);
-}
-
-static void
-sfw_session_expired(void *data)
-{
- struct sfw_session *sn = data;
-
- spin_lock(&sfw_data.fw_lock);
-
- LASSERT(sn->sn_timer_active);
- LASSERT(sn == sfw_data.fw_session);
-
- CWARN("Session expired! sid: %s-%llu, name: %s\n",
- libcfs_nid2str(sn->sn_id.ses_nid),
- sn->sn_id.ses_stamp, &sn->sn_name[0]);
-
- sn->sn_timer_active = 0;
- sfw_deactivate_session();
-
- spin_unlock(&sfw_data.fw_lock);
-}
-
-static inline void
-sfw_init_session(struct sfw_session *sn, struct lst_sid sid,
- unsigned int features, const char *name)
-{
- struct stt_timer *timer = &sn->sn_timer;
-
- memset(sn, 0, sizeof(struct sfw_session));
- INIT_LIST_HEAD(&sn->sn_list);
- INIT_LIST_HEAD(&sn->sn_batches);
- atomic_set(&sn->sn_refcount, 1); /* +1 for caller */
- atomic_set(&sn->sn_brw_errors, 0);
- atomic_set(&sn->sn_ping_errors, 0);
- strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name));
-
- sn->sn_timer_active = 0;
- sn->sn_id = sid;
- sn->sn_features = features;
- sn->sn_timeout = session_timeout;
- sn->sn_started = cfs_time_current();
-
- timer->stt_data = sn;
- timer->stt_func = sfw_session_expired;
- INIT_LIST_HEAD(&timer->stt_list);
-}
-
-/* completion handler for incoming framework RPCs */
-static void
-sfw_server_rpc_done(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- int status = rpc->srpc_status;
-
- CDEBUG(D_NET, "Incoming framework RPC done: service %s, peer %s, status %s:%d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.swi_state),
- status);
-
- if (rpc->srpc_bulk)
- sfw_free_pages(rpc);
-}
-
-static void
-sfw_client_rpc_fini(struct srpc_client_rpc *rpc)
-{
- LASSERT(!rpc->crpc_bulk.bk_niov);
- LASSERT(list_empty(&rpc->crpc_list));
- LASSERT(!atomic_read(&rpc->crpc_refcount));
-
- CDEBUG(D_NET, "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(rpc->crpc_wi.swi_state),
- rpc->crpc_aborted, rpc->crpc_status);
-
- spin_lock(&sfw_data.fw_lock);
-
- /* my callers must finish all RPCs before shutting me down */
- LASSERT(!sfw_data.fw_shuttingdown);
- list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
-
- spin_unlock(&sfw_data.fw_lock);
-}
-
-static struct sfw_batch *
-sfw_find_batch(struct lst_bid bid)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct sfw_batch *bat;
-
- LASSERT(sn);
-
- list_for_each_entry(bat, &sn->sn_batches, bat_list) {
- if (bat->bat_id.bat_id == bid.bat_id)
- return bat;
- }
-
- return NULL;
-}
-
-static struct sfw_batch *
-sfw_bid2batch(struct lst_bid bid)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct sfw_batch *bat;
-
- LASSERT(sn);
-
- bat = sfw_find_batch(bid);
- if (bat)
- return bat;
-
- bat = kzalloc(sizeof(struct sfw_batch), GFP_NOFS);
- if (!bat)
- return NULL;
-
- bat->bat_error = 0;
- bat->bat_session = sn;
- bat->bat_id = bid;
- atomic_set(&bat->bat_nactive, 0);
- INIT_LIST_HEAD(&bat->bat_tests);
-
- list_add_tail(&bat->bat_list, &sn->sn_batches);
- return bat;
-}
-
-static int
-sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct sfw_counters *cnt = &reply->str_fw;
- struct sfw_batch *bat;
-
- reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (request->str_sid.ses_nid == LNET_NID_ANY) {
- reply->str_status = EINVAL;
- return 0;
- }
-
- if (!sn || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
- reply->str_status = ESRCH;
- return 0;
- }
-
- lnet_counters_get(&reply->str_lnet);
- srpc_get_counters(&reply->str_rpc);
-
- /*
- * send over the msecs since the session was started
- * with 32 bits to send, this is ~49 days
- */
- cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started);
- cnt->brw_errors = atomic_read(&sn->sn_brw_errors);
- cnt->ping_errors = atomic_read(&sn->sn_ping_errors);
- cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
-
- cnt->active_batches = 0;
- list_for_each_entry(bat, &sn->sn_batches, bat_list) {
- if (atomic_read(&bat->bat_nactive) > 0)
- cnt->active_batches++;
- }
-
- reply->str_status = 0;
- return 0;
-}
-
-int
-sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct srpc_msg *msg = container_of(request, struct srpc_msg,
- msg_body.mksn_reqst);
- int cplen = 0;
-
- if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
- reply->mksn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
- reply->mksn_status = EINVAL;
- return 0;
- }
-
- if (sn) {
- reply->mksn_status = 0;
- reply->mksn_sid = sn->sn_id;
- reply->mksn_timeout = sn->sn_timeout;
-
- if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
- atomic_inc(&sn->sn_refcount);
- return 0;
- }
-
- if (!request->mksn_force) {
- reply->mksn_status = EBUSY;
- cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0],
- sizeof(reply->mksn_name));
- if (cplen >= sizeof(reply->mksn_name))
- return -E2BIG;
- return 0;
- }
- }
-
- /*
- * reject the request if it requires unknown features
- * NB: old version will always accept all features because it's not
- * aware of srpc_msg::msg_ses_feats, it's a defect but it's also
- * harmless because it will return zero feature to console, and it's
- * console's responsibility to make sure all nodes in a session have
- * same feature mask.
- */
- if (msg->msg_ses_feats & ~LST_FEATS_MASK) {
- reply->mksn_status = EPROTO;
- return 0;
- }
-
- /* brand new or create by force */
- sn = kzalloc(sizeof(struct sfw_session), GFP_NOFS);
- if (!sn) {
- CERROR("dropping RPC mksn under memory pressure\n");
- return -ENOMEM;
- }
-
- sfw_init_session(sn, request->mksn_sid,
- msg->msg_ses_feats, &request->mksn_name[0]);
-
- spin_lock(&sfw_data.fw_lock);
-
- sfw_deactivate_session();
- LASSERT(!sfw_data.fw_session);
- sfw_data.fw_session = sn;
-
- spin_unlock(&sfw_data.fw_lock);
-
- reply->mksn_status = 0;
- reply->mksn_sid = sn->sn_id;
- reply->mksn_timeout = sn->sn_timeout;
- return 0;
-}
-
-static int
-sfw_remove_session(struct srpc_rmsn_reqst *request,
- struct srpc_rmsn_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
-
- reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
- reply->rmsn_status = EINVAL;
- return 0;
- }
-
- if (!sn || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
- reply->rmsn_status = !sn ? ESRCH : EBUSY;
- return 0;
- }
-
- if (!atomic_dec_and_test(&sn->sn_refcount)) {
- reply->rmsn_status = 0;
- return 0;
- }
-
- spin_lock(&sfw_data.fw_lock);
- sfw_deactivate_session();
- spin_unlock(&sfw_data.fw_lock);
-
- reply->rmsn_status = 0;
- reply->rmsn_sid = LST_INVALID_SID;
- LASSERT(!sfw_data.fw_session);
- return 0;
-}
-
-static int
-sfw_debug_session(struct srpc_debug_reqst *request,
- struct srpc_debug_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
-
- if (!sn) {
- reply->dbg_status = ESRCH;
- reply->dbg_sid = LST_INVALID_SID;
- return 0;
- }
-
- reply->dbg_status = 0;
- reply->dbg_sid = sn->sn_id;
- reply->dbg_timeout = sn->sn_timeout;
- if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name))
- >= sizeof(reply->dbg_name))
- return -E2BIG;
-
- return 0;
-}
-
-static void
-sfw_test_rpc_fini(struct srpc_client_rpc *rpc)
-{
- struct sfw_test_unit *tsu = rpc->crpc_priv;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
-
- /* Called with hold of tsi->tsi_lock */
- LASSERT(list_empty(&rpc->crpc_list));
- list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
-}
-
-static inline int
-sfw_test_buffers(struct sfw_test_instance *tsi)
-{
- struct sfw_test_case *tsc;
- struct srpc_service *svc;
- int nbuf;
-
- LASSERT(tsi);
- tsc = sfw_find_test_case(tsi->tsi_service);
- LASSERT(tsc);
- svc = tsc->tsc_srv_service;
- LASSERT(svc);
-
- nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts;
- return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA);
-}
-
-static int
-sfw_load_test(struct sfw_test_instance *tsi)
-{
- struct sfw_test_case *tsc;
- struct srpc_service *svc;
- int nbuf;
- int rc;
-
- LASSERT(tsi);
- tsc = sfw_find_test_case(tsi->tsi_service);
- nbuf = sfw_test_buffers(tsi);
- LASSERT(tsc);
- svc = tsc->tsc_srv_service;
-
- if (tsi->tsi_is_client) {
- tsi->tsi_ops = tsc->tsc_cli_ops;
- return 0;
- }
-
- rc = srpc_service_add_buffers(svc, nbuf);
- if (rc) {
- CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
- svc->sv_name, nbuf, rc);
- /*
- * NB: this error handler is not strictly correct, because
- * it may release more buffers than already allocated,
- * but it doesn't matter because request portal should
- * be lazy portal and will grow buffers if necessary.
- */
- srpc_service_remove_buffers(svc, nbuf);
- return -ENOMEM;
- }
-
- CDEBUG(D_NET, "Reserved %d buffers for test %s\n",
- nbuf * (srpc_serv_is_framework(svc) ?
- 2 : cfs_cpt_number(cfs_cpt_table)), svc->sv_name);
- return 0;
-}
-
-static void
-sfw_unload_test(struct sfw_test_instance *tsi)
-{
- struct sfw_test_case *tsc;
-
- LASSERT(tsi);
- tsc = sfw_find_test_case(tsi->tsi_service);
- LASSERT(tsc);
-
- if (tsi->tsi_is_client)
- return;
-
- /*
- * shrink buffers, because request portal is lazy portal
- * which can grow buffers at runtime so we may leave
- * some buffers behind, but never mind...
- */
- srpc_service_remove_buffers(tsc->tsc_srv_service,
- sfw_test_buffers(tsi));
-}
-
-static void
-sfw_destroy_test_instance(struct sfw_test_instance *tsi)
-{
- struct srpc_client_rpc *rpc;
- struct sfw_test_unit *tsu;
-
- if (!tsi->tsi_is_client)
- goto clean;
-
- tsi->tsi_ops->tso_fini(tsi);
-
- LASSERT(!tsi->tsi_stopping);
- LASSERT(list_empty(&tsi->tsi_active_rpcs));
- LASSERT(!sfw_test_active(tsi));
-
- while (!list_empty(&tsi->tsi_units)) {
- tsu = list_entry(tsi->tsi_units.next,
- struct sfw_test_unit, tsu_list);
- list_del(&tsu->tsu_list);
- kfree(tsu);
- }
-
- while (!list_empty(&tsi->tsi_free_rpcs)) {
- rpc = list_entry(tsi->tsi_free_rpcs.next,
- struct srpc_client_rpc, crpc_list);
- list_del(&rpc->crpc_list);
- kfree(rpc);
- }
-
-clean:
- sfw_unload_test(tsi);
- kfree(tsi);
-}
-
-static void
-sfw_destroy_batch(struct sfw_batch *tsb)
-{
- struct sfw_test_instance *tsi;
-
- LASSERT(!sfw_batch_active(tsb));
- LASSERT(list_empty(&tsb->bat_list));
-
- while (!list_empty(&tsb->bat_tests)) {
- tsi = list_entry(tsb->bat_tests.next,
- struct sfw_test_instance, tsi_list);
- list_del_init(&tsi->tsi_list);
- sfw_destroy_test_instance(tsi);
- }
-
- kfree(tsb);
-}
-
-void
-sfw_destroy_session(struct sfw_session *sn)
-{
- struct sfw_batch *batch;
-
- LASSERT(list_empty(&sn->sn_list));
- LASSERT(sn != sfw_data.fw_session);
-
- while (!list_empty(&sn->sn_batches)) {
- batch = list_entry(sn->sn_batches.next,
- struct sfw_batch, bat_list);
- list_del_init(&batch->bat_list);
- sfw_destroy_batch(batch);
- }
-
- kfree(sn);
- atomic_dec(&sfw_data.fw_nzombies);
-}
-
-static void
-sfw_unpack_addtest_req(struct srpc_msg *msg)
-{
- struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
-
- LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST);
- LASSERT(req->tsr_is_client);
-
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- if (req->tsr_service == SRPC_SERVICE_BRW) {
- if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
- struct test_bulk_req *bulk = &req->tsr_u.bulk_v0;
-
- __swab32s(&bulk->blk_opc);
- __swab32s(&bulk->blk_npg);
- __swab32s(&bulk->blk_flags);
-
- } else {
- struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1;
-
- __swab16s(&bulk->blk_opc);
- __swab16s(&bulk->blk_flags);
- __swab32s(&bulk->blk_offset);
- __swab32s(&bulk->blk_len);
- }
-
- return;
- }
-
- if (req->tsr_service == SRPC_SERVICE_PING) {
- struct test_ping_req *ping = &req->tsr_u.ping;
-
- __swab32s(&ping->png_size);
- __swab32s(&ping->png_flags);
- return;
- }
-
- LBUG();
-}
-
-static int
-sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
-{
- struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
- struct srpc_bulk *bk = rpc->srpc_bulk;
- int ndest = req->tsr_ndest;
- struct sfw_test_unit *tsu;
- struct sfw_test_instance *tsi;
- int i;
- int rc;
-
- tsi = kzalloc(sizeof(*tsi), GFP_NOFS);
- if (!tsi) {
- CERROR("Can't allocate test instance for batch: %llu\n",
- tsb->bat_id.bat_id);
- return -ENOMEM;
- }
-
- spin_lock_init(&tsi->tsi_lock);
- atomic_set(&tsi->tsi_nactive, 0);
- INIT_LIST_HEAD(&tsi->tsi_units);
- INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
- INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
-
- tsi->tsi_stopping = 0;
- tsi->tsi_batch = tsb;
- tsi->tsi_loop = req->tsr_loop;
- tsi->tsi_concur = req->tsr_concur;
- tsi->tsi_service = req->tsr_service;
- tsi->tsi_is_client = !!(req->tsr_is_client);
- tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
-
- rc = sfw_load_test(tsi);
- if (rc) {
- kfree(tsi);
- return rc;
- }
-
- LASSERT(!sfw_batch_active(tsb));
-
- if (!tsi->tsi_is_client) {
- /* it's test server, just add it to tsb */
- list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
- return 0;
- }
-
- LASSERT(bk);
- LASSERT(bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
- LASSERT((unsigned int)bk->bk_len >=
- sizeof(struct lnet_process_id_packed) * ndest);
-
- sfw_unpack_addtest_req(msg);
- memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
-
- for (i = 0; i < ndest; i++) {
- struct lnet_process_id_packed *dests;
- struct lnet_process_id_packed id;
- int j;
-
- dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page);
- LASSERT(dests); /* my pages are within KVM always */
- id = dests[i % SFW_ID_PER_PAGE];
- if (msg->msg_magic != SRPC_MSG_MAGIC)
- sfw_unpack_id(id);
-
- for (j = 0; j < tsi->tsi_concur; j++) {
- tsu = kzalloc(sizeof(struct sfw_test_unit), GFP_NOFS);
- if (!tsu) {
- rc = -ENOMEM;
- CERROR("Can't allocate tsu for %d\n",
- tsi->tsi_service);
- goto error;
- }
-
- tsu->tsu_dest.nid = id.nid;
- tsu->tsu_dest.pid = id.pid;
- tsu->tsu_instance = tsi;
- tsu->tsu_private = NULL;
- list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
- }
- }
-
- rc = tsi->tsi_ops->tso_init(tsi);
- if (!rc) {
- list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
- return 0;
- }
-
-error:
- LASSERT(rc);
- sfw_destroy_test_instance(tsi);
- return rc;
-}
-
-static void
-sfw_test_unit_done(struct sfw_test_unit *tsu)
-{
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_batch *tsb = tsi->tsi_batch;
- struct sfw_session *sn = tsb->bat_session;
-
- LASSERT(sfw_test_active(tsi));
-
- if (!atomic_dec_and_test(&tsi->tsi_nactive))
- return;
-
- /* the test instance is done */
- spin_lock(&tsi->tsi_lock);
-
- tsi->tsi_stopping = 0;
-
- spin_unlock(&tsi->tsi_lock);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (!atomic_dec_and_test(&tsb->bat_nactive) || /* tsb still active */
- sn == sfw_data.fw_session) { /* sn also active */
- spin_unlock(&sfw_data.fw_lock);
- return;
- }
-
- LASSERT(!list_empty(&sn->sn_list)); /* I'm a zombie! */
-
- list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
- if (sfw_batch_active(tsb)) {
- spin_unlock(&sfw_data.fw_lock);
- return;
- }
- }
-
- list_del_init(&sn->sn_list);
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_destroy_session(sn);
-}
-
-static void
-sfw_test_rpc_done(struct srpc_client_rpc *rpc)
-{
- struct sfw_test_unit *tsu = rpc->crpc_priv;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- int done = 0;
-
- tsi->tsi_ops->tso_done_rpc(tsu, rpc);
-
- spin_lock(&tsi->tsi_lock);
-
- LASSERT(sfw_test_active(tsi));
- LASSERT(!list_empty(&rpc->crpc_list));
-
- list_del_init(&rpc->crpc_list);
-
- /* batch is stopping or loop is done or get error */
- if (tsi->tsi_stopping || !tsu->tsu_loop ||
- (rpc->crpc_status && tsi->tsi_stoptsu_onerr))
- done = 1;
-
- /* dec ref for poster */
- srpc_client_rpc_decref(rpc);
-
- spin_unlock(&tsi->tsi_lock);
-
- if (!done) {
- swi_schedule_workitem(&tsu->tsu_worker);
- return;
- }
-
- sfw_test_unit_done(tsu);
-}
-
-int
-sfw_create_test_rpc(struct sfw_test_unit *tsu, struct lnet_process_id peer,
- unsigned int features, int nblk, int blklen,
- struct srpc_client_rpc **rpcpp)
-{
- struct srpc_client_rpc *rpc = NULL;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
-
- spin_lock(&tsi->tsi_lock);
-
- LASSERT(sfw_test_active(tsi));
- /* pick request from buffer */
- rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs,
- struct srpc_client_rpc, crpc_list);
- if (rpc) {
- LASSERT(nblk == rpc->crpc_bulk.bk_niov);
- list_del_init(&rpc->crpc_list);
- }
-
- spin_unlock(&tsi->tsi_lock);
-
- if (!rpc) {
- rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
- blklen, sfw_test_rpc_done,
- sfw_test_rpc_fini, tsu);
- } else {
- srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
- blklen, sfw_test_rpc_done,
- sfw_test_rpc_fini, tsu);
- }
-
- if (!rpc) {
- CERROR("Can't create rpc for test %d\n", tsi->tsi_service);
- return -ENOMEM;
- }
-
- rpc->crpc_reqstmsg.msg_ses_feats = features;
- *rpcpp = rpc;
-
- return 0;
-}
-
-static void
-sfw_run_test(struct swi_workitem *wi)
-{
- struct sfw_test_unit *tsu = container_of(wi, struct sfw_test_unit, tsu_worker);
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct srpc_client_rpc *rpc = NULL;
-
- if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc)) {
- LASSERT(!rpc);
- goto test_done;
- }
-
- LASSERT(rpc);
-
- spin_lock(&tsi->tsi_lock);
-
- if (tsi->tsi_stopping) {
- list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
- spin_unlock(&tsi->tsi_lock);
- goto test_done;
- }
-
- if (tsu->tsu_loop > 0)
- tsu->tsu_loop--;
-
- list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
- spin_unlock(&tsi->tsi_lock);
-
- spin_lock(&rpc->crpc_lock);
- rpc->crpc_timeout = rpc_timeout;
- srpc_post_rpc(rpc);
- spin_unlock(&rpc->crpc_lock);
- return;
-
-test_done:
- /*
- * No one can schedule me now since:
- * - previous RPC, if any, has done and
- * - no new RPC is initiated.
- * - my batch is still active; no one can run it again now.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- sfw_test_unit_done(tsu);
-}
-
-static int
-sfw_run_batch(struct sfw_batch *tsb)
-{
- struct swi_workitem *wi;
- struct sfw_test_unit *tsu;
- struct sfw_test_instance *tsi;
-
- if (sfw_batch_active(tsb)) {
- CDEBUG(D_NET, "Batch already active: %llu (%d)\n",
- tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
- return 0;
- }
-
- list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
- if (!tsi->tsi_is_client) /* skip server instances */
- continue;
-
- LASSERT(!tsi->tsi_stopping);
- LASSERT(!sfw_test_active(tsi));
-
- atomic_inc(&tsb->bat_nactive);
-
- list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- atomic_inc(&tsi->tsi_nactive);
- tsu->tsu_loop = tsi->tsi_loop;
- wi = &tsu->tsu_worker;
- swi_init_workitem(wi, sfw_run_test,
- lst_test_wq[lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
- swi_schedule_workitem(wi);
- }
- }
-
- return 0;
-}
-
-int
-sfw_stop_batch(struct sfw_batch *tsb, int force)
-{
- struct sfw_test_instance *tsi;
- struct srpc_client_rpc *rpc;
-
- if (!sfw_batch_active(tsb)) {
- CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id);
- return 0;
- }
-
- list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
- spin_lock(&tsi->tsi_lock);
-
- if (!tsi->tsi_is_client ||
- !sfw_test_active(tsi) || tsi->tsi_stopping) {
- spin_unlock(&tsi->tsi_lock);
- continue;
- }
-
- tsi->tsi_stopping = 1;
-
- if (!force) {
- spin_unlock(&tsi->tsi_lock);
- continue;
- }
-
- /* abort launched rpcs in the test */
- list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) {
- spin_lock(&rpc->crpc_lock);
-
- srpc_abort_rpc(rpc, -EINTR);
-
- spin_unlock(&rpc->crpc_lock);
- }
-
- spin_unlock(&tsi->tsi_lock);
- }
-
- return 0;
-}
-
-static int
-sfw_query_batch(struct sfw_batch *tsb, int testidx,
- struct srpc_batch_reply *reply)
-{
- struct sfw_test_instance *tsi;
-
- if (testidx < 0)
- return -EINVAL;
-
- if (!testidx) {
- reply->bar_active = atomic_read(&tsb->bat_nactive);
- return 0;
- }
-
- list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
- if (testidx-- > 1)
- continue;
-
- reply->bar_active = atomic_read(&tsi->tsi_nactive);
- return 0;
- }
-
- return -ENOENT;
-}
-
-void
-sfw_free_pages(struct srpc_server_rpc *rpc)
-{
- srpc_free_bulk(rpc->srpc_bulk);
- rpc->srpc_bulk = NULL;
-}
-
-int
-sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
- int sink)
-{
- LASSERT(!rpc->srpc_bulk);
- LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
-
- rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink);
- if (!rpc->srpc_bulk)
- return -ENOMEM;
-
- return 0;
-}
-
-static int
-sfw_add_test(struct srpc_server_rpc *rpc)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
- struct srpc_test_reqst *request;
- int rc;
- struct sfw_batch *bat;
-
- request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
- reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (!request->tsr_loop ||
- !request->tsr_concur ||
- request->tsr_sid.ses_nid == LNET_NID_ANY ||
- request->tsr_ndest > SFW_MAX_NDESTS ||
- (request->tsr_is_client && !request->tsr_ndest) ||
- request->tsr_concur > SFW_MAX_CONCUR ||
- request->tsr_service > SRPC_SERVICE_MAX_ID ||
- request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
- reply->tsr_status = EINVAL;
- return 0;
- }
-
- if (!sn || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
- !sfw_find_test_case(request->tsr_service)) {
- reply->tsr_status = ENOENT;
- return 0;
- }
-
- bat = sfw_bid2batch(request->tsr_bid);
- if (!bat) {
- CERROR("dropping RPC %s from %s under memory pressure\n",
- rpc->srpc_scd->scd_svc->sv_name,
- libcfs_id2str(rpc->srpc_peer));
- return -ENOMEM;
- }
-
- if (sfw_batch_active(bat)) {
- reply->tsr_status = EBUSY;
- return 0;
- }
-
- if (request->tsr_is_client && !rpc->srpc_bulk) {
- /* rpc will be resumed later in sfw_bulk_ready */
- int npg = sfw_id_pages(request->tsr_ndest);
- int len;
-
- if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
- len = npg * PAGE_SIZE;
-
- } else {
- len = sizeof(struct lnet_process_id_packed) *
- request->tsr_ndest;
- }
-
- return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1);
- }
-
- rc = sfw_add_test_instance(bat, rpc);
- CDEBUG(!rc ? D_NET : D_WARNING,
- "%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
- !rc ? "Added" : "Failed to add", request->tsr_service,
- request->tsr_is_client ? "client" : "server",
- request->tsr_loop, request->tsr_concur, request->tsr_ndest);
-
- reply->tsr_status = (rc < 0) ? -rc : rc;
- return 0;
-}
-
-static int
-sfw_control_batch(struct srpc_batch_reqst *request,
- struct srpc_batch_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- int rc = 0;
- struct sfw_batch *bat;
-
- reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (!sn || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
- reply->bar_status = ESRCH;
- return 0;
- }
-
- bat = sfw_find_batch(request->bar_bid);
- if (!bat) {
- reply->bar_status = ENOENT;
- return 0;
- }
-
- switch (request->bar_opc) {
- case SRPC_BATCH_OPC_RUN:
- rc = sfw_run_batch(bat);
- break;
-
- case SRPC_BATCH_OPC_STOP:
- rc = sfw_stop_batch(bat, request->bar_arg);
- break;
-
- case SRPC_BATCH_OPC_QUERY:
- rc = sfw_query_batch(bat, request->bar_testidx, reply);
- break;
-
- default:
- return -EINVAL; /* drop it */
- }
-
- reply->bar_status = (rc < 0) ? -rc : rc;
- return 0;
-}
-
-static int
-sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- struct srpc_msg *reply = &rpc->srpc_replymsg;
- struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg;
- unsigned int features = LST_FEATS_MASK;
- int rc = 0;
-
- LASSERT(!sfw_data.fw_active_srpc);
- LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (sfw_data.fw_shuttingdown) {
- spin_unlock(&sfw_data.fw_lock);
- return -ESHUTDOWN;
- }
-
- /* Remove timer to avoid racing with it or expiring active session */
- if (sfw_del_session_timer()) {
- CERROR("dropping RPC %s from %s: racing with expiry timer\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer));
- spin_unlock(&sfw_data.fw_lock);
- return -EAGAIN;
- }
-
- sfw_data.fw_active_srpc = rpc;
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_unpack_message(request);
- LASSERT(request->msg_type == srpc_service2request(sv->sv_id));
-
- /* rpc module should have checked this */
- LASSERT(request->msg_version == SRPC_MSG_VERSION);
-
- if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION &&
- sv->sv_id != SRPC_SERVICE_DEBUG) {
- struct sfw_session *sn = sfw_data.fw_session;
-
- if (sn &&
- sn->sn_features != request->msg_ses_feats) {
- CNETERR("Features of framework RPC don't match features of current session: %x/%x\n",
- request->msg_ses_feats, sn->sn_features);
- reply->msg_body.reply.status = EPROTO;
- reply->msg_body.reply.sid = sn->sn_id;
- goto out;
- }
-
- } else if (request->msg_ses_feats & ~LST_FEATS_MASK) {
- /*
- * NB: at this point, old version will ignore features and
- * create new session anyway, so console should be able
- * to handle this
- */
- reply->msg_body.reply.status = EPROTO;
- goto out;
- }
-
- switch (sv->sv_id) {
- default:
- LBUG();
- case SRPC_SERVICE_TEST:
- rc = sfw_add_test(rpc);
- break;
-
- case SRPC_SERVICE_BATCH:
- rc = sfw_control_batch(&request->msg_body.bat_reqst,
- &reply->msg_body.bat_reply);
- break;
-
- case SRPC_SERVICE_QUERY_STAT:
- rc = sfw_get_stats(&request->msg_body.stat_reqst,
- &reply->msg_body.stat_reply);
- break;
-
- case SRPC_SERVICE_DEBUG:
- rc = sfw_debug_session(&request->msg_body.dbg_reqst,
- &reply->msg_body.dbg_reply);
- break;
-
- case SRPC_SERVICE_MAKE_SESSION:
- rc = sfw_make_session(&request->msg_body.mksn_reqst,
- &reply->msg_body.mksn_reply);
- break;
-
- case SRPC_SERVICE_REMOVE_SESSION:
- rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
- &reply->msg_body.rmsn_reply);
- break;
- }
-
- if (sfw_data.fw_session)
- features = sfw_data.fw_session->sn_features;
- out:
- reply->msg_ses_feats = features;
- rpc->srpc_done = sfw_server_rpc_done;
- spin_lock(&sfw_data.fw_lock);
-
- if (!sfw_data.fw_shuttingdown)
- sfw_add_session_timer();
-
- sfw_data.fw_active_srpc = NULL;
- spin_unlock(&sfw_data.fw_lock);
- return rc;
-}
-
-static int
-sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- int rc;
-
- LASSERT(rpc->srpc_bulk);
- LASSERT(sv->sv_id == SRPC_SERVICE_TEST);
- LASSERT(!sfw_data.fw_active_srpc);
- LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (status) {
- CERROR("Bulk transfer failed for RPC: service %s, peer %s, status %d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
- spin_unlock(&sfw_data.fw_lock);
- return -EIO;
- }
-
- if (sfw_data.fw_shuttingdown) {
- spin_unlock(&sfw_data.fw_lock);
- return -ESHUTDOWN;
- }
-
- if (sfw_del_session_timer()) {
- CERROR("dropping RPC %s from %s: racing with expiry timer\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer));
- spin_unlock(&sfw_data.fw_lock);
- return -EAGAIN;
- }
-
- sfw_data.fw_active_srpc = rpc;
- spin_unlock(&sfw_data.fw_lock);
-
- rc = sfw_add_test(rpc);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (!sfw_data.fw_shuttingdown)
- sfw_add_session_timer();
-
- sfw_data.fw_active_srpc = NULL;
- spin_unlock(&sfw_data.fw_lock);
- return rc;
-}
-
-struct srpc_client_rpc *
-sfw_create_rpc(struct lnet_process_id peer, int service,
- unsigned int features, int nbulkiov, int bulklen,
- void (*done)(struct srpc_client_rpc *), void *priv)
-{
- struct srpc_client_rpc *rpc = NULL;
-
- spin_lock(&sfw_data.fw_lock);
-
- LASSERT(!sfw_data.fw_shuttingdown);
- LASSERT(service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) {
- rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
- struct srpc_client_rpc, crpc_list);
- list_del(&rpc->crpc_list);
-
- srpc_init_client_rpc(rpc, peer, service, 0, 0,
- done, sfw_client_rpc_fini, priv);
- }
-
- spin_unlock(&sfw_data.fw_lock);
-
- if (!rpc) {
- rpc = srpc_create_client_rpc(peer, service,
- nbulkiov, bulklen, done,
- nbulkiov ? NULL :
- sfw_client_rpc_fini,
- priv);
- }
-
- if (rpc) /* "session" is concept in framework */
- rpc->crpc_reqstmsg.msg_ses_feats = features;
-
- return rpc;
-}
-
-void
-sfw_unpack_message(struct srpc_msg *msg)
-{
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- /* srpc module should guarantee I wouldn't get crap */
- LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- if (msg->msg_type == SRPC_MSG_STAT_REQST) {
- struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst;
-
- __swab32s(&req->str_type);
- __swab64s(&req->str_rpyid);
- sfw_unpack_sid(req->str_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
- struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
-
- __swab32s(&rep->str_status);
- sfw_unpack_sid(rep->str_sid);
- sfw_unpack_fw_counters(rep->str_fw);
- sfw_unpack_rpc_counters(rep->str_rpc);
- sfw_unpack_lnet_counters(rep->str_lnet);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
- struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst;
-
- __swab64s(&req->mksn_rpyid);
- __swab32s(&req->mksn_force);
- sfw_unpack_sid(req->mksn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
- struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply;
-
- __swab32s(&rep->mksn_status);
- __swab32s(&rep->mksn_timeout);
- sfw_unpack_sid(rep->mksn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
- struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst;
-
- __swab64s(&req->rmsn_rpyid);
- sfw_unpack_sid(req->rmsn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
- struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply;
-
- __swab32s(&rep->rmsn_status);
- sfw_unpack_sid(rep->rmsn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
- struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst;
-
- __swab64s(&req->dbg_rpyid);
- __swab32s(&req->dbg_flags);
- sfw_unpack_sid(req->dbg_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
- struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply;
-
- __swab32s(&rep->dbg_nbatch);
- __swab32s(&rep->dbg_timeout);
- sfw_unpack_sid(rep->dbg_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
- struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst;
-
- __swab32s(&req->bar_opc);
- __swab64s(&req->bar_rpyid);
- __swab32s(&req->bar_testidx);
- __swab32s(&req->bar_arg);
- sfw_unpack_sid(req->bar_sid);
- __swab64s(&req->bar_bid.bat_id);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
- struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
-
- __swab32s(&rep->bar_status);
- sfw_unpack_sid(rep->bar_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_TEST_REQST) {
- struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
-
- __swab64s(&req->tsr_rpyid);
- __swab64s(&req->tsr_bulkid);
- __swab32s(&req->tsr_loop);
- __swab32s(&req->tsr_ndest);
- __swab32s(&req->tsr_concur);
- __swab32s(&req->tsr_service);
- sfw_unpack_sid(req->tsr_sid);
- __swab64s(&req->tsr_bid.bat_id);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
- struct srpc_test_reply *rep = &msg->msg_body.tes_reply;
-
- __swab32s(&rep->tsr_status);
- sfw_unpack_sid(rep->tsr_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
- struct srpc_join_reqst *req = &msg->msg_body.join_reqst;
-
- __swab64s(&req->join_rpyid);
- sfw_unpack_sid(req->join_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
- struct srpc_join_reply *rep = &msg->msg_body.join_reply;
-
- __swab32s(&rep->join_status);
- __swab32s(&rep->join_timeout);
- sfw_unpack_sid(rep->join_sid);
- return;
- }
-
- LBUG();
-}
-
-void
-sfw_abort_rpc(struct srpc_client_rpc *rpc)
-{
- LASSERT(atomic_read(&rpc->crpc_refcount) > 0);
- LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- spin_lock(&rpc->crpc_lock);
- srpc_abort_rpc(rpc, -EINTR);
- spin_unlock(&rpc->crpc_lock);
-}
-
-void
-sfw_post_rpc(struct srpc_client_rpc *rpc)
-{
- spin_lock(&rpc->crpc_lock);
-
- LASSERT(!rpc->crpc_closed);
- LASSERT(!rpc->crpc_aborted);
- LASSERT(list_empty(&rpc->crpc_list));
- LASSERT(!sfw_data.fw_shuttingdown);
-
- rpc->crpc_timeout = rpc_timeout;
- srpc_post_rpc(rpc);
-
- spin_unlock(&rpc->crpc_lock);
-}
-
-static struct srpc_service sfw_services[] = {
- {
- /* sv_id */ SRPC_SERVICE_DEBUG,
- /* sv_name */ "debug",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_QUERY_STAT,
- /* sv_name */ "query stats",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_MAKE_SESSION,
- /* sv_name */ "make session",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_REMOVE_SESSION,
- /* sv_name */ "remove session",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_BATCH,
- /* sv_name */ "batch service",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_TEST,
- /* sv_name */ "test service",
- 0
- },
- {
- /* sv_id */ 0,
- /* sv_name */ NULL,
- 0
- }
-};
-
-int
-sfw_startup(void)
-{
- int i;
- int rc;
- int error;
- struct srpc_service *sv;
- struct sfw_test_case *tsc;
-
- if (session_timeout < 0) {
- CERROR("Session timeout must be non-negative: %d\n",
- session_timeout);
- return -EINVAL;
- }
-
- if (rpc_timeout < 0) {
- CERROR("RPC timeout must be non-negative: %d\n",
- rpc_timeout);
- return -EINVAL;
- }
-
- if (!session_timeout)
- CWARN("Zero session_timeout specified - test sessions never expire.\n");
-
- if (!rpc_timeout)
- CWARN("Zero rpc_timeout specified - test RPC never expire.\n");
-
- memset(&sfw_data, 0, sizeof(struct smoketest_framework));
-
- sfw_data.fw_session = NULL;
- sfw_data.fw_active_srpc = NULL;
- spin_lock_init(&sfw_data.fw_lock);
- atomic_set(&sfw_data.fw_nzombies, 0);
- INIT_LIST_HEAD(&sfw_data.fw_tests);
- INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
- INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
-
- brw_init_test_client();
- brw_init_test_service();
- rc = sfw_register_test(&brw_test_service, &brw_test_client);
- LASSERT(!rc);
-
- ping_init_test_client();
- ping_init_test_service();
- rc = sfw_register_test(&ping_test_service, &ping_test_client);
- LASSERT(!rc);
-
- error = 0;
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- sv = tsc->tsc_srv_service;
-
- rc = srpc_add_service(sv);
- LASSERT(rc != -EBUSY);
- if (rc) {
- CWARN("Failed to add %s service: %d\n",
- sv->sv_name, rc);
- error = rc;
- }
- }
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (!sv->sv_name)
- break;
-
- sv->sv_bulk_ready = NULL;
- sv->sv_handler = sfw_handle_server_rpc;
- sv->sv_wi_total = SFW_FRWK_WI_MAX;
- if (sv->sv_id == SRPC_SERVICE_TEST)
- sv->sv_bulk_ready = sfw_bulk_ready;
-
- rc = srpc_add_service(sv);
- LASSERT(rc != -EBUSY);
- if (rc) {
- CWARN("Failed to add %s service: %d\n",
- sv->sv_name, rc);
- error = rc;
- }
-
- /* about to sfw_shutdown, no need to add buffer */
- if (error)
- continue;
-
- rc = srpc_service_add_buffers(sv, sv->sv_wi_total);
- if (rc) {
- CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
- sv->sv_name, sv->sv_wi_total, rc);
- error = -ENOMEM;
- }
- }
-
- if (error)
- sfw_shutdown();
- return error;
-}
-
-void
-sfw_shutdown(void)
-{
- struct srpc_service *sv;
- struct sfw_test_case *tsc;
- int i;
-
- spin_lock(&sfw_data.fw_lock);
-
- sfw_data.fw_shuttingdown = 1;
- lst_wait_until(!sfw_data.fw_active_srpc, sfw_data.fw_lock,
- "waiting for active RPC to finish.\n");
-
- if (sfw_del_session_timer())
- lst_wait_until(!sfw_data.fw_session, sfw_data.fw_lock,
- "waiting for session timer to explode.\n");
-
- sfw_deactivate_session();
- lst_wait_until(!atomic_read(&sfw_data.fw_nzombies),
- sfw_data.fw_lock,
- "waiting for %d zombie sessions to die.\n",
- atomic_read(&sfw_data.fw_nzombies));
-
- spin_unlock(&sfw_data.fw_lock);
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (!sv->sv_name)
- break;
-
- srpc_shutdown_service(sv);
- srpc_remove_service(sv);
- }
-
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- sv = tsc->tsc_srv_service;
- srpc_shutdown_service(sv);
- srpc_remove_service(sv);
- }
-
- while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
- struct srpc_client_rpc *rpc;
-
- rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
- struct srpc_client_rpc, crpc_list);
- list_del(&rpc->crpc_list);
-
- kfree(rpc);
- }
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (!sv->sv_name)
- break;
-
- srpc_wait_service_shutdown(sv);
- }
-
- while (!list_empty(&sfw_data.fw_tests)) {
- tsc = list_entry(sfw_data.fw_tests.next,
- struct sfw_test_case, tsc_list);
-
- srpc_wait_service_shutdown(tsc->tsc_srv_service);
-
- list_del(&tsc->tsc_list);
- kfree(tsc);
- }
-}
diff --git a/drivers/staging/lustre/lnet/selftest/module.c b/drivers/staging/lustre/lnet/selftest/module.c
deleted file mode 100644
index 7359aa56d9b3..000000000000
--- a/drivers/staging/lustre/lnet/selftest/module.c
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-#include "console.h"
-
-enum {
- LST_INIT_NONE = 0,
- LST_INIT_WI_SERIAL,
- LST_INIT_WI_TEST,
- LST_INIT_RPC,
- LST_INIT_FW,
- LST_INIT_CONSOLE
-};
-
-static int lst_init_step = LST_INIT_NONE;
-
-struct workqueue_struct *lst_serial_wq;
-struct workqueue_struct **lst_test_wq;
-
-static void
-lnet_selftest_exit(void)
-{
- int i;
-
- switch (lst_init_step) {
- case LST_INIT_CONSOLE:
- lstcon_console_fini();
- /* fall through */
- case LST_INIT_FW:
- sfw_shutdown();
- /* fall through */
- case LST_INIT_RPC:
- srpc_shutdown();
- /* fall through */
- case LST_INIT_WI_TEST:
- for (i = 0;
- i < cfs_cpt_number(lnet_cpt_table()); i++) {
- if (!lst_test_wq[i])
- continue;
- destroy_workqueue(lst_test_wq[i]);
- }
- kvfree(lst_test_wq);
- lst_test_wq = NULL;
- /* fall through */
- case LST_INIT_WI_SERIAL:
- destroy_workqueue(lst_serial_wq);
- lst_serial_wq = NULL;
- case LST_INIT_NONE:
- break;
- default:
- LBUG();
- }
-}
-
-static int
-lnet_selftest_init(void)
-{
- int nscheds;
- int rc = -ENOMEM;
- int i;
-
- lst_serial_wq = alloc_ordered_workqueue("lst_s", 0);
- if (!lst_serial_wq) {
- CERROR("Failed to create serial WI scheduler for LST\n");
- return -ENOMEM;
- }
- lst_init_step = LST_INIT_WI_SERIAL;
-
- nscheds = cfs_cpt_number(lnet_cpt_table());
- lst_test_wq = kvmalloc_array(nscheds, sizeof(lst_test_wq[0]),
- GFP_KERNEL | __GFP_ZERO);
- if (!lst_test_wq) {
- rc = -ENOMEM;
- goto error;
- }
-
- lst_init_step = LST_INIT_WI_TEST;
- for (i = 0; i < nscheds; i++) {
- int nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
- struct workqueue_attrs attrs = {0};
- cpumask_var_t *mask = cfs_cpt_cpumask(lnet_cpt_table(), i);
-
- /* reserve at least one CPU for LND */
- nthrs = max(nthrs - 1, 1);
- lst_test_wq[i] = alloc_workqueue("lst_t", WQ_UNBOUND, nthrs);
- if (!lst_test_wq[i]) {
- CWARN("Failed to create CPU partition affinity WI scheduler %d for LST\n",
- i);
- rc = -ENOMEM;
- goto error;
- }
-
- if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) {
- cpumask_copy(attrs.cpumask, *mask);
- apply_workqueue_attrs(lst_test_wq[i], &attrs);
- free_cpumask_var(attrs.cpumask);
- }
- }
-
- rc = srpc_startup();
- if (rc) {
- CERROR("LST can't startup rpc\n");
- goto error;
- }
- lst_init_step = LST_INIT_RPC;
-
- rc = sfw_startup();
- if (rc) {
- CERROR("LST can't startup framework\n");
- goto error;
- }
- lst_init_step = LST_INIT_FW;
-
- rc = lstcon_console_init();
- if (rc) {
- CERROR("LST can't startup console\n");
- goto error;
- }
- lst_init_step = LST_INIT_CONSOLE;
- return 0;
-error:
- lnet_selftest_exit();
- return rc;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("LNet Selftest");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(lnet_selftest_init);
-module_exit(lnet_selftest_exit);
diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c
deleted file mode 100644
index f54bd630dbf8..000000000000
--- a/drivers/staging/lustre/lnet/selftest/ping_test.c
+++ /dev/null
@@ -1,228 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Test client & Server
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include "selftest.h"
-
-#define LST_PING_TEST_MAGIC 0xbabeface
-
-static int ping_srv_workitems = SFW_TEST_WI_MAX;
-module_param(ping_srv_workitems, int, 0644);
-MODULE_PARM_DESC(ping_srv_workitems, "# PING server workitems");
-
-struct lst_ping_data {
- spinlock_t pnd_lock; /* serialize */
- int pnd_counter; /* sequence counter */
-};
-
-static struct lst_ping_data lst_ping_data;
-
-static int
-ping_client_init(struct sfw_test_instance *tsi)
-{
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
-
- LASSERT(tsi->tsi_is_client);
- LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK));
-
- spin_lock_init(&lst_ping_data.pnd_lock);
- lst_ping_data.pnd_counter = 0;
-
- return 0;
-}
-
-static void
-ping_client_fini(struct sfw_test_instance *tsi)
-{
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- int errors;
-
- LASSERT(sn);
- LASSERT(tsi->tsi_is_client);
-
- errors = atomic_read(&sn->sn_ping_errors);
- if (errors)
- CWARN("%d pings have failed.\n", errors);
- else
- CDEBUG(D_NET, "Ping test finished OK.\n");
-}
-
-static int
-ping_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
- struct srpc_client_rpc **rpc)
-{
- struct srpc_ping_reqst *req;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct timespec64 ts;
- int rc;
-
- LASSERT(sn);
- LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
- rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc);
- if (rc)
- return rc;
-
- req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
-
- req->pnr_magic = LST_PING_TEST_MAGIC;
-
- spin_lock(&lst_ping_data.pnd_lock);
- req->pnr_seq = lst_ping_data.pnd_counter++;
- spin_unlock(&lst_ping_data.pnd_lock);
-
- ktime_get_real_ts64(&ts);
- req->pnr_time_sec = ts.tv_sec;
- req->pnr_time_usec = ts.tv_nsec / NSEC_PER_USEC;
-
- return rc;
-}
-
-static void
-ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
-{
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
- struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
- struct timespec64 ts;
-
- LASSERT(sn);
-
- if (rpc->crpc_status) {
- if (!tsi->tsi_stopping) /* rpc could have been aborted */
- atomic_inc(&sn->sn_ping_errors);
- CERROR("Unable to ping %s (%d): %d\n",
- libcfs_id2str(rpc->crpc_dest),
- reqst->pnr_seq, rpc->crpc_status);
- return;
- }
-
- if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
- __swab32s(&reply->pnr_seq);
- __swab32s(&reply->pnr_magic);
- __swab32s(&reply->pnr_status);
- }
-
- if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
- rpc->crpc_status = -EBADMSG;
- atomic_inc(&sn->sn_ping_errors);
- CERROR("Bad magic %u from %s, %u expected.\n",
- reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
- LST_PING_TEST_MAGIC);
- return;
- }
-
- if (reply->pnr_seq != reqst->pnr_seq) {
- rpc->crpc_status = -EBADMSG;
- atomic_inc(&sn->sn_ping_errors);
- CERROR("Bad seq %u from %s, %u expected.\n",
- reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
- reqst->pnr_seq);
- return;
- }
-
- ktime_get_real_ts64(&ts);
- CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq,
- (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 +
- (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec)));
-}
-
-static int
-ping_server_handle(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_msg *replymsg = &rpc->srpc_replymsg;
- struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst;
- struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
-
- LASSERT(sv->sv_id == SRPC_SERVICE_PING);
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
- LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- __swab32s(&req->pnr_seq);
- __swab32s(&req->pnr_magic);
- __swab64s(&req->pnr_time_sec);
- __swab64s(&req->pnr_time_usec);
- }
- LASSERT(reqstmsg->msg_type == srpc_service2request(sv->sv_id));
-
- if (req->pnr_magic != LST_PING_TEST_MAGIC) {
- CERROR("Unexpected magic %08x from %s\n",
- req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
- return -EINVAL;
- }
-
- rep->pnr_seq = req->pnr_seq;
- rep->pnr_magic = LST_PING_TEST_MAGIC;
-
- if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
- replymsg->msg_ses_feats = LST_FEATS_MASK;
- rep->pnr_status = EPROTO;
- return 0;
- }
-
- replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
-
- CDEBUG(D_NET, "Get ping %d from %s\n",
- req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
- return 0;
-}
-
-struct sfw_test_client_ops ping_test_client;
-
-void ping_init_test_client(void)
-{
- ping_test_client.tso_init = ping_client_init;
- ping_test_client.tso_fini = ping_client_fini;
- ping_test_client.tso_prep_rpc = ping_client_prep_rpc;
- ping_test_client.tso_done_rpc = ping_client_done_rpc;
-}
-
-struct srpc_service ping_test_service;
-
-void ping_init_test_service(void)
-{
- ping_test_service.sv_id = SRPC_SERVICE_PING;
- ping_test_service.sv_name = "ping_test";
- ping_test_service.sv_handler = ping_server_handle;
- ping_test_service.sv_wi_total = ping_srv_workitems;
-}
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
deleted file mode 100644
index 9613b0a77007..000000000000
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ /dev/null
@@ -1,1682 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/rpc.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- * 2012-05-13: Liang Zhen <liang@whamcloud.com>
- * - percpt data for service to improve smp performance
- * - code cleanup
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-enum srpc_state {
- SRPC_STATE_NONE,
- SRPC_STATE_NI_INIT,
- SRPC_STATE_EQ_INIT,
- SRPC_STATE_RUNNING,
- SRPC_STATE_STOPPING,
-};
-
-static struct smoketest_rpc {
- spinlock_t rpc_glock; /* global lock */
- struct srpc_service *rpc_services[SRPC_SERVICE_MAX_ID + 1];
- struct lnet_handle_eq rpc_lnet_eq; /* _the_ LNet event queue */
- enum srpc_state rpc_state;
- struct srpc_counters rpc_counters;
- __u64 rpc_matchbits; /* matchbits counter */
-} srpc_data;
-
-static inline int
-srpc_serv_portal(int svc_id)
-{
- return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ?
- SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL;
-}
-
-/* forward ref's */
-void srpc_handle_rpc(struct swi_workitem *wi);
-
-void srpc_get_counters(struct srpc_counters *cnt)
-{
- spin_lock(&srpc_data.rpc_glock);
- *cnt = srpc_data.rpc_counters;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-void srpc_set_counters(const struct srpc_counters *cnt)
-{
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters = *cnt;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-static int
-srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
- int nob)
-{
- LASSERT(off < PAGE_SIZE);
- LASSERT(nob > 0 && nob <= PAGE_SIZE);
-
- bk->bk_iovs[i].bv_offset = off;
- bk->bk_iovs[i].bv_page = pg;
- bk->bk_iovs[i].bv_len = nob;
- return nob;
-}
-
-void
-srpc_free_bulk(struct srpc_bulk *bk)
-{
- int i;
- struct page *pg;
-
- LASSERT(bk);
-
- for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].bv_page;
- if (!pg)
- break;
-
- __free_page(pg);
- }
-
- kfree(bk);
-}
-
-struct srpc_bulk *
-srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg,
- unsigned int bulk_len, int sink)
-{
- struct srpc_bulk *bk;
- int i;
-
- LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
-
- bk = kzalloc_cpt(offsetof(struct srpc_bulk, bk_iovs[bulk_npg]),
- GFP_KERNEL, cpt);
- if (!bk) {
- CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
- return NULL;
- }
-
- memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
- bk->bk_sink = sink;
- bk->bk_len = bulk_len;
- bk->bk_niov = bulk_npg;
-
- for (i = 0; i < bulk_npg; i++) {
- struct page *pg;
- int nob;
-
- pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_KERNEL, 0);
- if (!pg) {
- CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
- srpc_free_bulk(bk);
- return NULL;
- }
-
- nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) -
- bulk_off;
- srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
- bulk_len -= nob;
- bulk_off = 0;
- }
-
- return bk;
-}
-
-static inline __u64
-srpc_next_id(void)
-{
- __u64 id;
-
- spin_lock(&srpc_data.rpc_glock);
- id = srpc_data.rpc_matchbits++;
- spin_unlock(&srpc_data.rpc_glock);
- return id;
-}
-
-static void
-srpc_init_server_rpc(struct srpc_server_rpc *rpc,
- struct srpc_service_cd *scd,
- struct srpc_buffer *buffer)
-{
- memset(rpc, 0, sizeof(*rpc));
- swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
- srpc_serv_is_framework(scd->scd_svc) ?
- lst_serial_wq : lst_test_wq[scd->scd_cpt]);
-
- rpc->srpc_ev.ev_fired = 1; /* no event expected now */
-
- rpc->srpc_scd = scd;
- rpc->srpc_reqstbuf = buffer;
- rpc->srpc_peer = buffer->buf_peer;
- rpc->srpc_self = buffer->buf_self;
- LNetInvalidateMDHandle(&rpc->srpc_replymdh);
-}
-
-static void
-srpc_service_fini(struct srpc_service *svc)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- struct srpc_buffer *buf;
- struct list_head *q;
- int i;
-
- if (!svc->sv_cpt_data)
- return;
-
- cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
- while (1) {
- if (!list_empty(&scd->scd_buf_posted))
- q = &scd->scd_buf_posted;
- else if (!list_empty(&scd->scd_buf_blocked))
- q = &scd->scd_buf_blocked;
- else
- break;
-
- while (!list_empty(q)) {
- buf = list_entry(q->next, struct srpc_buffer,
- buf_list);
- list_del(&buf->buf_list);
- kfree(buf);
- }
- }
-
- LASSERT(list_empty(&scd->scd_rpc_active));
-
- while (!list_empty(&scd->scd_rpc_free)) {
- rpc = list_entry(scd->scd_rpc_free.next,
- struct srpc_server_rpc,
- srpc_list);
- list_del(&rpc->srpc_list);
- kfree(rpc);
- }
- }
-
- cfs_percpt_free(svc->sv_cpt_data);
- svc->sv_cpt_data = NULL;
-}
-
-static int
-srpc_service_nrpcs(struct srpc_service *svc)
-{
- int nrpcs = svc->sv_wi_total / svc->sv_ncpts;
-
- return srpc_serv_is_framework(svc) ?
- max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN);
-}
-
-void srpc_add_buffer(struct swi_workitem *wi);
-
-static int
-srpc_service_init(struct srpc_service *svc)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- int nrpcs;
- int i;
- int j;
-
- svc->sv_shuttingdown = 0;
-
- svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(**svc->sv_cpt_data));
- if (!svc->sv_cpt_data)
- return -ENOMEM;
-
- svc->sv_ncpts = srpc_serv_is_framework(svc) ?
- 1 : cfs_cpt_number(lnet_cpt_table());
- nrpcs = srpc_service_nrpcs(svc);
-
- cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
- scd->scd_cpt = i;
- scd->scd_svc = svc;
- spin_lock_init(&scd->scd_lock);
- INIT_LIST_HEAD(&scd->scd_rpc_free);
- INIT_LIST_HEAD(&scd->scd_rpc_active);
- INIT_LIST_HEAD(&scd->scd_buf_posted);
- INIT_LIST_HEAD(&scd->scd_buf_blocked);
-
- scd->scd_ev.ev_data = scd;
- scd->scd_ev.ev_type = SRPC_REQUEST_RCVD;
-
- /*
- * NB: don't use lst_serial_wq for adding buffer,
- * see details in srpc_service_add_buffers()
- */
- swi_init_workitem(&scd->scd_buf_wi,
- srpc_add_buffer, lst_test_wq[i]);
-
- if (i && srpc_serv_is_framework(svc)) {
- /*
- * NB: framework service only needs srpc_service_cd for
- * one partition, but we allocate for all to make
- * it easier to implement, it will waste a little
- * memory but nobody should care about this
- */
- continue;
- }
-
- for (j = 0; j < nrpcs; j++) {
- rpc = kzalloc_cpt(sizeof(*rpc), GFP_NOFS, i);
- if (!rpc) {
- srpc_service_fini(svc);
- return -ENOMEM;
- }
- list_add(&rpc->srpc_list, &scd->scd_rpc_free);
- }
- }
-
- return 0;
-}
-
-int
-srpc_add_service(struct srpc_service *sv)
-{
- int id = sv->sv_id;
-
- LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID);
-
- if (srpc_service_init(sv))
- return -ENOMEM;
-
- spin_lock(&srpc_data.rpc_glock);
-
- LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
- if (srpc_data.rpc_services[id]) {
- spin_unlock(&srpc_data.rpc_glock);
- goto failed;
- }
-
- srpc_data.rpc_services[id] = sv;
- spin_unlock(&srpc_data.rpc_glock);
-
- CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name);
- return 0;
-
- failed:
- srpc_service_fini(sv);
- return -EBUSY;
-}
-
-int
-srpc_remove_service(struct srpc_service *sv)
-{
- int id = sv->sv_id;
-
- spin_lock(&srpc_data.rpc_glock);
-
- if (srpc_data.rpc_services[id] != sv) {
- spin_unlock(&srpc_data.rpc_glock);
- return -ENOENT;
- }
-
- srpc_data.rpc_services[id] = NULL;
- spin_unlock(&srpc_data.rpc_glock);
- return 0;
-}
-
-static int
-srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
- int len, int options, struct lnet_process_id peer,
- struct lnet_handle_md *mdh, struct srpc_event *ev)
-{
- int rc;
- struct lnet_md md;
- struct lnet_handle_me meh;
-
- rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK,
- local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh);
- if (rc) {
- CERROR("LNetMEAttach failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
- return -ENOMEM;
- }
-
- md.threshold = 1;
- md.user_ptr = ev;
- md.start = buf;
- md.length = len;
- md.options = options;
- md.eq_handle = srpc_data.rpc_lnet_eq;
-
- rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh);
- if (rc) {
- CERROR("LNetMDAttach failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
-
- rc = LNetMEUnlink(meh);
- LASSERT(!rc);
- return -ENOMEM;
- }
-
- CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n",
- libcfs_id2str(peer), portal, matchbits);
- return 0;
-}
-
-static int
-srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
- int options, struct lnet_process_id peer,
- lnet_nid_t self, struct lnet_handle_md *mdh,
- struct srpc_event *ev)
-{
- int rc;
- struct lnet_md md;
-
- md.user_ptr = ev;
- md.start = buf;
- md.length = len;
- md.eq_handle = srpc_data.rpc_lnet_eq;
- md.threshold = options & LNET_MD_OP_GET ? 2 : 1;
- md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
-
- rc = LNetMDBind(md, LNET_UNLINK, mdh);
- if (rc) {
- CERROR("LNetMDBind failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
- return -ENOMEM;
- }
-
- /*
- * this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
- * they're only meaningful for MDs attached to an ME (i.e. passive
- * buffers...
- */
- if (options & LNET_MD_OP_PUT) {
- rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
- portal, matchbits, 0, 0);
- } else {
- LASSERT(options & LNET_MD_OP_GET);
-
- rc = LNetGet(self, *mdh, peer, portal, matchbits, 0);
- }
-
- if (rc) {
- CERROR("LNet%s(%s, %d, %lld) failed: %d\n",
- options & LNET_MD_OP_PUT ? "Put" : "Get",
- libcfs_id2str(peer), portal, matchbits, rc);
-
- /*
- * The forthcoming unlink event will complete this operation
- * with failure, so fall through and return success here.
- */
- rc = LNetMDUnlink(*mdh);
- LASSERT(!rc);
- } else {
- CDEBUG(D_NET, "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n",
- libcfs_id2str(peer), portal, matchbits);
- }
- return 0;
-}
-
-static int
-srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
- struct lnet_handle_md *mdh, struct srpc_event *ev)
-{
- struct lnet_process_id any = { 0 };
-
- any.nid = LNET_NID_ANY;
- any.pid = LNET_PID_ANY;
-
- return srpc_post_passive_rdma(srpc_serv_portal(service),
- local, service, buf, len,
- LNET_MD_OP_PUT, any, mdh, ev);
-}
-
-static int
-srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
-__must_hold(&scd->scd_lock)
-{
- struct srpc_service *sv = scd->scd_svc;
- struct srpc_msg *msg = &buf->buf_msg;
- int rc;
-
- LNetInvalidateMDHandle(&buf->buf_mdh);
- list_add(&buf->buf_list, &scd->scd_buf_posted);
- scd->scd_buf_nposted++;
- spin_unlock(&scd->scd_lock);
-
- rc = srpc_post_passive_rqtbuf(sv->sv_id,
- !srpc_serv_is_framework(sv),
- msg, sizeof(*msg), &buf->buf_mdh,
- &scd->scd_ev);
-
- /*
- * At this point, a RPC (new or delayed) may have arrived in
- * msg and its event handler has been called. So we must add
- * buf to scd_buf_posted _before_ dropping scd_lock
- */
- spin_lock(&scd->scd_lock);
-
- if (!rc) {
- if (!sv->sv_shuttingdown)
- return 0;
-
- spin_unlock(&scd->scd_lock);
- /*
- * srpc_shutdown_service might have tried to unlink me
- * when my buf_mdh was still invalid
- */
- LNetMDUnlink(buf->buf_mdh);
- spin_lock(&scd->scd_lock);
- return 0;
- }
-
- scd->scd_buf_nposted--;
- if (sv->sv_shuttingdown)
- return rc; /* don't allow to change scd_buf_posted */
-
- list_del(&buf->buf_list);
- spin_unlock(&scd->scd_lock);
-
- kfree(buf);
-
- spin_lock(&scd->scd_lock);
- return rc;
-}
-
-void
-srpc_add_buffer(struct swi_workitem *wi)
-{
- struct srpc_service_cd *scd = container_of(wi, struct srpc_service_cd, scd_buf_wi);
- struct srpc_buffer *buf;
- int rc = 0;
-
- /*
- * it's called by workitem scheduler threads, these threads
- * should have been set CPT affinity, so buffers will be posted
- * on CPT local list of Portal
- */
- spin_lock(&scd->scd_lock);
-
- while (scd->scd_buf_adjust > 0 &&
- !scd->scd_svc->sv_shuttingdown) {
- scd->scd_buf_adjust--; /* consume it */
- scd->scd_buf_posting++;
-
- spin_unlock(&scd->scd_lock);
-
- buf = kzalloc(sizeof(*buf), GFP_NOFS);
- if (!buf) {
- CERROR("Failed to add new buf to service: %s\n",
- scd->scd_svc->sv_name);
- spin_lock(&scd->scd_lock);
- rc = -ENOMEM;
- break;
- }
-
- spin_lock(&scd->scd_lock);
- if (scd->scd_svc->sv_shuttingdown) {
- spin_unlock(&scd->scd_lock);
- kfree(buf);
-
- spin_lock(&scd->scd_lock);
- rc = -ESHUTDOWN;
- break;
- }
-
- rc = srpc_service_post_buffer(scd, buf);
- if (rc)
- break; /* buf has been freed inside */
-
- LASSERT(scd->scd_buf_posting > 0);
- scd->scd_buf_posting--;
- scd->scd_buf_total++;
- scd->scd_buf_low = max(2, scd->scd_buf_total / 4);
- }
-
- if (rc) {
- scd->scd_buf_err_stamp = ktime_get_real_seconds();
- scd->scd_buf_err = rc;
-
- LASSERT(scd->scd_buf_posting > 0);
- scd->scd_buf_posting--;
- }
-
- spin_unlock(&scd->scd_lock);
-}
-
-int
-srpc_service_add_buffers(struct srpc_service *sv, int nbuffer)
-{
- struct srpc_service_cd *scd;
- int rc = 0;
- int i;
-
- LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- scd->scd_buf_err = 0;
- scd->scd_buf_err_stamp = 0;
- scd->scd_buf_posting = 0;
- scd->scd_buf_adjust = nbuffer;
- /* start to post buffers */
- swi_schedule_workitem(&scd->scd_buf_wi);
- spin_unlock(&scd->scd_lock);
-
- /* framework service only post buffer for one partition */
- if (srpc_serv_is_framework(sv))
- break;
- }
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
- /*
- * NB: srpc_service_add_buffers() can be called inside
- * thread context of lst_serial_wq, and we don't normally
- * allow to sleep inside thread context of WI scheduler
- * because it will block current scheduler thread from doing
- * anything else, even worse, it could deadlock if it's
- * waiting on result from another WI of the same scheduler.
- * However, it's safe at here because scd_buf_wi is scheduled
- * by thread in a different WI scheduler (lst_test_wq),
- * so we don't have any risk of deadlock, though this could
- * block all WIs pending on lst_serial_wq for a moment
- * which is not good but not fatal.
- */
- lst_wait_until(scd->scd_buf_err ||
- (!scd->scd_buf_adjust &&
- !scd->scd_buf_posting),
- scd->scd_lock, "waiting for adding buffer\n");
-
- if (scd->scd_buf_err && !rc)
- rc = scd->scd_buf_err;
-
- spin_unlock(&scd->scd_lock);
- }
-
- return rc;
-}
-
-void
-srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer)
-{
- struct srpc_service_cd *scd;
- int num;
- int i;
-
- LASSERT(!sv->sv_shuttingdown);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- num = scd->scd_buf_total + scd->scd_buf_posting;
- scd->scd_buf_adjust -= min(nbuffer, num);
-
- spin_unlock(&scd->scd_lock);
- }
-}
-
-/* returns 1 if sv has finished, otherwise 0 */
-int
-srpc_finish_service(struct srpc_service *sv)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- int i;
-
- LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- swi_cancel_workitem(&scd->scd_buf_wi);
-
- spin_lock(&scd->scd_lock);
-
- if (scd->scd_buf_nposted > 0) {
- CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n",
- scd->scd_buf_nposted);
- spin_unlock(&scd->scd_lock);
- return 0;
- }
-
- if (list_empty(&scd->scd_rpc_active)) {
- spin_unlock(&scd->scd_lock);
- continue;
- }
-
- rpc = list_entry(scd->scd_rpc_active.next,
- struct srpc_server_rpc, srpc_list);
- CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s, ev fired %d type %d status %d lnet %d\n",
- rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.swi_state),
- rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type,
- rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet);
- spin_unlock(&scd->scd_lock);
- return 0;
- }
-
- /* no lock needed from now on */
- srpc_service_fini(sv);
- return 1;
-}
-
-/* called with sv->sv_lock held */
-static void
-srpc_service_recycle_buffer(struct srpc_service_cd *scd,
- struct srpc_buffer *buf)
-__must_hold(&scd->scd_lock)
-{
- if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
- if (srpc_service_post_buffer(scd, buf)) {
- CWARN("Failed to post %s buffer\n",
- scd->scd_svc->sv_name);
- }
- return;
- }
-
- /* service is shutting down, or we want to recycle some buffers */
- scd->scd_buf_total--;
-
- if (scd->scd_buf_adjust < 0) {
- scd->scd_buf_adjust++;
- if (scd->scd_buf_adjust < 0 &&
- !scd->scd_buf_total && !scd->scd_buf_posting) {
- CDEBUG(D_INFO,
- "Try to recycle %d buffers but nothing left\n",
- scd->scd_buf_adjust);
- scd->scd_buf_adjust = 0;
- }
- }
-
- spin_unlock(&scd->scd_lock);
- kfree(buf);
- spin_lock(&scd->scd_lock);
-}
-
-void
-srpc_abort_service(struct srpc_service *sv)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- int i;
-
- CDEBUG(D_NET, "Aborting service: id %d, name %s\n",
- sv->sv_id, sv->sv_name);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- /*
- * schedule in-flight RPCs to notice the abort, NB:
- * racing with incoming RPCs; complete fix should make test
- * RPCs carry session ID in its headers
- */
- list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) {
- rpc->srpc_aborted = 1;
- swi_schedule_workitem(&rpc->srpc_wi);
- }
-
- spin_unlock(&scd->scd_lock);
- }
-}
-
-void
-srpc_shutdown_service(struct srpc_service *sv)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- struct srpc_buffer *buf;
- int i;
-
- CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
- sv->sv_id, sv->sv_name);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
- spin_lock(&scd->scd_lock);
-
- sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
- spin_unlock(&scd->scd_lock);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- /* schedule in-flight RPCs to notice the shutdown */
- list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list)
- swi_schedule_workitem(&rpc->srpc_wi);
-
- spin_unlock(&scd->scd_lock);
-
- /*
- * OK to traverse scd_buf_posted without lock, since no one
- * touches scd_buf_posted now
- */
- list_for_each_entry(buf, &scd->scd_buf_posted, buf_list)
- LNetMDUnlink(buf->buf_mdh);
- }
-}
-
-static int
-srpc_send_request(struct srpc_client_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->crpc_reqstev;
- int rc;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REQUEST_SENT;
-
- rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service),
- rpc->crpc_service, &rpc->crpc_reqstmsg,
- sizeof(struct srpc_msg), LNET_MD_OP_PUT,
- rpc->crpc_dest, LNET_NID_ANY,
- &rpc->crpc_reqstmdh, ev);
- if (rc) {
- LASSERT(rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-static int
-srpc_prepare_reply(struct srpc_client_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->crpc_replyev;
- __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
- int rc;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REPLY_RCVD;
-
- *id = srpc_next_id();
-
- rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
- &rpc->crpc_replymsg,
- sizeof(struct srpc_msg),
- LNET_MD_OP_PUT, rpc->crpc_dest,
- &rpc->crpc_replymdh, ev);
- if (rc) {
- LASSERT(rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-static int
-srpc_prepare_bulk(struct srpc_client_rpc *rpc)
-{
- struct srpc_bulk *bk = &rpc->crpc_bulk;
- struct srpc_event *ev = &rpc->crpc_bulkev;
- __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
- int rc;
- int opt;
-
- LASSERT(bk->bk_niov <= LNET_MAX_IOV);
-
- if (!bk->bk_niov)
- return 0; /* nothing to do */
-
- opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
- opt |= LNET_MD_KIOV;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_BULK_REQ_RCVD;
-
- *id = srpc_next_id();
-
- rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
- &bk->bk_iovs[0], bk->bk_niov, opt,
- rpc->crpc_dest, &bk->bk_mdh, ev);
- if (rc) {
- LASSERT(rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-static int
-srpc_do_bulk(struct srpc_server_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->srpc_ev;
- struct srpc_bulk *bk = rpc->srpc_bulk;
- __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
- int rc;
- int opt;
-
- LASSERT(bk);
-
- opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
- opt |= LNET_MD_KIOV;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
-
- rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
- &bk->bk_iovs[0], bk->bk_niov, opt,
- rpc->srpc_peer, rpc->srpc_self,
- &bk->bk_mdh, ev);
- if (rc)
- ev->ev_fired = 1; /* no more event expected */
- return rc;
-}
-
-/* only called from srpc_handle_rpc */
-static void
-srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
-{
- struct srpc_service_cd *scd = rpc->srpc_scd;
- struct srpc_service *sv = scd->scd_svc;
- struct srpc_buffer *buffer;
-
- LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
-
- rpc->srpc_status = status;
-
- CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
- "Server RPC %p done: service %s, peer %s, status %s:%d\n",
- rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.swi_state), status);
-
- if (status) {
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_dropped++;
- spin_unlock(&srpc_data.rpc_glock);
- }
-
- if (rpc->srpc_done)
- (*rpc->srpc_done) (rpc);
- LASSERT(!rpc->srpc_bulk);
-
- spin_lock(&scd->scd_lock);
-
- if (rpc->srpc_reqstbuf) {
- /*
- * NB might drop sv_lock in srpc_service_recycle_buffer, but
- * sv won't go away for scd_rpc_active must not be empty
- */
- srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf);
- rpc->srpc_reqstbuf = NULL;
- }
-
- list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */
-
- /*
- * No one can schedule me now since:
- * - I'm not on scd_rpc_active.
- * - all LNet events have been fired.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- LASSERT(rpc->srpc_ev.ev_fired);
-
- if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
- buffer = list_entry(scd->scd_buf_blocked.next,
- struct srpc_buffer, buf_list);
- list_del(&buffer->buf_list);
-
- srpc_init_server_rpc(rpc, scd, buffer);
- list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active);
- swi_schedule_workitem(&rpc->srpc_wi);
- } else {
- list_add(&rpc->srpc_list, &scd->scd_rpc_free);
- }
-
- spin_unlock(&scd->scd_lock);
-}
-
-/* handles an incoming RPC */
-void
-srpc_handle_rpc(struct swi_workitem *wi)
-{
- struct srpc_server_rpc *rpc = container_of(wi, struct srpc_server_rpc, srpc_wi);
- struct srpc_service_cd *scd = rpc->srpc_scd;
- struct srpc_service *sv = scd->scd_svc;
- struct srpc_event *ev = &rpc->srpc_ev;
- int rc = 0;
-
- LASSERT(wi == &rpc->srpc_wi);
-
- spin_lock(&scd->scd_lock);
-
- if (sv->sv_shuttingdown || rpc->srpc_aborted) {
- spin_unlock(&scd->scd_lock);
-
- if (rpc->srpc_bulk)
- LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
- LNetMDUnlink(rpc->srpc_replymdh);
-
- if (ev->ev_fired) { /* no more event, OK to finish */
- srpc_server_rpc_done(rpc, -ESHUTDOWN);
- }
- return;
- }
-
- spin_unlock(&scd->scd_lock);
-
- switch (wi->swi_state) {
- default:
- LBUG();
- case SWI_STATE_NEWBORN: {
- struct srpc_msg *msg;
- struct srpc_generic_reply *reply;
-
- msg = &rpc->srpc_reqstbuf->buf_msg;
- reply = &rpc->srpc_replymsg.msg_body.reply;
-
- if (!msg->msg_magic) {
- /* moaned already in srpc_lnet_ev_handler */
- srpc_server_rpc_done(rpc, EBADMSG);
- return;
- }
-
- srpc_unpack_msg_hdr(msg);
- if (msg->msg_version != SRPC_MSG_VERSION) {
- CWARN("Version mismatch: %u, %u expected, from %s\n",
- msg->msg_version, SRPC_MSG_VERSION,
- libcfs_id2str(rpc->srpc_peer));
- reply->status = EPROTO;
- /* drop through and send reply */
- } else {
- reply->status = 0;
- rc = (*sv->sv_handler)(rpc);
- LASSERT(!reply->status || !rpc->srpc_bulk);
- if (rc) {
- srpc_server_rpc_done(rpc, rc);
- return;
- }
- }
-
- wi->swi_state = SWI_STATE_BULK_STARTED;
-
- if (rpc->srpc_bulk) {
- rc = srpc_do_bulk(rpc);
- if (!rc)
- return; /* wait for bulk */
-
- LASSERT(ev->ev_fired);
- ev->ev_status = rc;
- }
- }
- /* fall through */
- case SWI_STATE_BULK_STARTED:
- LASSERT(!rpc->srpc_bulk || ev->ev_fired);
-
- if (rpc->srpc_bulk) {
- rc = ev->ev_status;
-
- if (sv->sv_bulk_ready)
- rc = (*sv->sv_bulk_ready) (rpc, rc);
-
- if (rc) {
- srpc_server_rpc_done(rpc, rc);
- return;
- }
- }
-
- wi->swi_state = SWI_STATE_REPLY_SUBMITTED;
- rc = srpc_send_reply(rpc);
- if (!rc)
- return; /* wait for reply */
- srpc_server_rpc_done(rpc, rc);
- return;
-
- case SWI_STATE_REPLY_SUBMITTED:
- if (!ev->ev_fired) {
- CERROR("RPC %p: bulk %p, service %d\n",
- rpc, rpc->srpc_bulk, sv->sv_id);
- CERROR("Event: status %d, type %d, lnet %d\n",
- ev->ev_status, ev->ev_type, ev->ev_lnet);
- LASSERT(ev->ev_fired);
- }
-
- wi->swi_state = SWI_STATE_DONE;
- srpc_server_rpc_done(rpc, ev->ev_status);
- return;
- }
-}
-
-static void
-srpc_client_rpc_expired(void *data)
-{
- struct srpc_client_rpc *rpc = data;
-
- CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- rpc->crpc_timeout);
-
- spin_lock(&rpc->crpc_lock);
-
- rpc->crpc_timeout = 0;
- srpc_abort_rpc(rpc, -ETIMEDOUT);
-
- spin_unlock(&rpc->crpc_lock);
-
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_expired++;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-static void
-srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc)
-{
- struct stt_timer *timer = &rpc->crpc_timer;
-
- if (!rpc->crpc_timeout)
- return;
-
- INIT_LIST_HEAD(&timer->stt_list);
- timer->stt_data = rpc;
- timer->stt_func = srpc_client_rpc_expired;
- timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout;
- stt_add_timer(timer);
-}
-
-/*
- * Called with rpc->crpc_lock held.
- *
- * Upon exit the RPC expiry timer is not queued and the handler is not
- * running on any CPU.
- */
-static void
-srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc)
-{
- /* timer not planted or already exploded */
- if (!rpc->crpc_timeout)
- return;
-
- /* timer successfully defused */
- if (stt_del_timer(&rpc->crpc_timer))
- return;
-
- /* timer detonated, wait for it to explode */
- while (rpc->crpc_timeout) {
- spin_unlock(&rpc->crpc_lock);
-
- schedule();
-
- spin_lock(&rpc->crpc_lock);
- }
-}
-
-static void
-srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status)
-{
- struct swi_workitem *wi = &rpc->crpc_wi;
-
- LASSERT(status || wi->swi_state == SWI_STATE_DONE);
-
- spin_lock(&rpc->crpc_lock);
-
- rpc->crpc_closed = 1;
- if (!rpc->crpc_status)
- rpc->crpc_status = status;
-
- srpc_del_client_rpc_timer(rpc);
-
- CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
- "Client RPC done: service %d, peer %s, status %s:%d:%d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(wi->swi_state), rpc->crpc_aborted, status);
-
- /*
- * No one can schedule me now since:
- * - RPC timer has been defused.
- * - all LNet events have been fired.
- * - crpc_closed has been set, preventing srpc_abort_rpc from
- * scheduling me.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- LASSERT(!srpc_event_pending(rpc));
-
- spin_unlock(&rpc->crpc_lock);
-
- (*rpc->crpc_done)(rpc);
-}
-
-/* sends an outgoing RPC */
-void
-srpc_send_rpc(struct swi_workitem *wi)
-{
- int rc = 0;
- struct srpc_client_rpc *rpc;
- struct srpc_msg *reply;
- int do_bulk;
-
- LASSERT(wi);
-
- rpc = container_of(wi, struct srpc_client_rpc, crpc_wi);
-
- LASSERT(rpc);
- LASSERT(wi == &rpc->crpc_wi);
-
- reply = &rpc->crpc_replymsg;
- do_bulk = rpc->crpc_bulk.bk_niov > 0;
-
- spin_lock(&rpc->crpc_lock);
-
- if (rpc->crpc_aborted) {
- spin_unlock(&rpc->crpc_lock);
- goto abort;
- }
-
- spin_unlock(&rpc->crpc_lock);
-
- switch (wi->swi_state) {
- default:
- LBUG();
- case SWI_STATE_NEWBORN:
- LASSERT(!srpc_event_pending(rpc));
-
- rc = srpc_prepare_reply(rpc);
- if (rc) {
- srpc_client_rpc_done(rpc, rc);
- return;
- }
-
- rc = srpc_prepare_bulk(rpc);
- if (rc)
- break;
-
- wi->swi_state = SWI_STATE_REQUEST_SUBMITTED;
- rc = srpc_send_request(rpc);
- break;
-
- case SWI_STATE_REQUEST_SUBMITTED:
- /*
- * CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
- * order; however, they're processed in a strict order:
- * rqt, rpy, and bulk.
- */
- if (!rpc->crpc_reqstev.ev_fired)
- break;
-
- rc = rpc->crpc_reqstev.ev_status;
- if (rc)
- break;
-
- wi->swi_state = SWI_STATE_REQUEST_SENT;
- /* perhaps more events */
- /* fall through */
- case SWI_STATE_REQUEST_SENT: {
- enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service);
-
- if (!rpc->crpc_replyev.ev_fired)
- break;
-
- rc = rpc->crpc_replyev.ev_status;
- if (rc)
- break;
-
- srpc_unpack_msg_hdr(reply);
- if (reply->msg_type != type ||
- (reply->msg_magic != SRPC_MSG_MAGIC &&
- reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
- CWARN("Bad message from %s: type %u (%d expected), magic %u (%d expected).\n",
- libcfs_id2str(rpc->crpc_dest),
- reply->msg_type, type,
- reply->msg_magic, SRPC_MSG_MAGIC);
- rc = -EBADMSG;
- break;
- }
-
- if (do_bulk && reply->msg_body.reply.status) {
- CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n",
- reply->msg_body.reply.status,
- libcfs_id2str(rpc->crpc_dest));
- LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
- }
-
- wi->swi_state = SWI_STATE_REPLY_RECEIVED;
- }
- /* fall through */
- case SWI_STATE_REPLY_RECEIVED:
- if (do_bulk && !rpc->crpc_bulkev.ev_fired)
- break;
-
- rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
-
- /*
- * Bulk buffer was unlinked due to remote error. Clear error
- * since reply buffer still contains valid data.
- * NB rpc->crpc_done shouldn't look into bulk data in case of
- * remote error.
- */
- if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
- !rpc->crpc_status && reply->msg_body.reply.status)
- rc = 0;
-
- wi->swi_state = SWI_STATE_DONE;
- srpc_client_rpc_done(rpc, rc);
- return;
- }
-
- if (rc) {
- spin_lock(&rpc->crpc_lock);
- srpc_abort_rpc(rpc, rc);
- spin_unlock(&rpc->crpc_lock);
- }
-
-abort:
- if (rpc->crpc_aborted) {
- LNetMDUnlink(rpc->crpc_reqstmdh);
- LNetMDUnlink(rpc->crpc_replymdh);
- LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
-
- if (!srpc_event_pending(rpc)) {
- srpc_client_rpc_done(rpc, -EINTR);
- return;
- }
- }
-}
-
-struct srpc_client_rpc *
-srpc_create_client_rpc(struct lnet_process_id peer, int service,
- int nbulkiov, int bulklen,
- void (*rpc_done)(struct srpc_client_rpc *),
- void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
-{
- struct srpc_client_rpc *rpc;
-
- rpc = kzalloc(offsetof(struct srpc_client_rpc,
- crpc_bulk.bk_iovs[nbulkiov]), GFP_KERNEL);
- if (!rpc)
- return NULL;
-
- srpc_init_client_rpc(rpc, peer, service, nbulkiov,
- bulklen, rpc_done, rpc_fini, priv);
- return rpc;
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_abort_rpc(struct srpc_client_rpc *rpc, int why)
-{
- LASSERT(why);
-
- if (rpc->crpc_aborted || /* already aborted */
- rpc->crpc_closed) /* callback imminent */
- return;
-
- CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(rpc->crpc_wi.swi_state), why);
-
- rpc->crpc_aborted = 1;
- rpc->crpc_status = why;
- swi_schedule_workitem(&rpc->crpc_wi);
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_post_rpc(struct srpc_client_rpc *rpc)
-{
- LASSERT(!rpc->crpc_aborted);
- LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
- CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
- libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
- rpc->crpc_timeout);
-
- srpc_add_client_rpc_timer(rpc);
- swi_schedule_workitem(&rpc->crpc_wi);
-}
-
-int
-srpc_send_reply(struct srpc_server_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->srpc_ev;
- struct srpc_msg *msg = &rpc->srpc_replymsg;
- struct srpc_buffer *buffer = rpc->srpc_reqstbuf;
- struct srpc_service_cd *scd = rpc->srpc_scd;
- struct srpc_service *sv = scd->scd_svc;
- __u64 rpyid;
- int rc;
-
- LASSERT(buffer);
- rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
-
- spin_lock(&scd->scd_lock);
-
- if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) {
- /*
- * Repost buffer before replying since test client
- * might send me another RPC once it gets the reply
- */
- if (srpc_service_post_buffer(scd, buffer))
- CWARN("Failed to repost %s buffer\n", sv->sv_name);
- rpc->srpc_reqstbuf = NULL;
- }
-
- spin_unlock(&scd->scd_lock);
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REPLY_SENT;
-
- msg->msg_magic = SRPC_MSG_MAGIC;
- msg->msg_version = SRPC_MSG_VERSION;
- msg->msg_type = srpc_service2reply(sv->sv_id);
-
- rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
- sizeof(*msg), LNET_MD_OP_PUT,
- rpc->srpc_peer, rpc->srpc_self,
- &rpc->srpc_replymdh, ev);
- if (rc)
- ev->ev_fired = 1; /* no more event expected */
- return rc;
-}
-
-/* when in kernel always called with LNET_LOCK() held, and in thread context */
-static void
-srpc_lnet_ev_handler(struct lnet_event *ev)
-{
- struct srpc_service_cd *scd;
- struct srpc_event *rpcev = ev->md.user_ptr;
- struct srpc_client_rpc *crpc;
- struct srpc_server_rpc *srpc;
- struct srpc_buffer *buffer;
- struct srpc_service *sv;
- struct srpc_msg *msg;
- enum srpc_msg_type type;
-
- LASSERT(!in_interrupt());
-
- if (ev->status) {
- __u32 errors;
-
- spin_lock(&srpc_data.rpc_glock);
- if (ev->status != -ECANCELED) /* cancellation is not error */
- srpc_data.rpc_counters.errors++;
- errors = srpc_data.rpc_counters.errors;
- spin_unlock(&srpc_data.rpc_glock);
-
- CNETERR("LNet event status %d type %d, RPC errors %u\n",
- ev->status, ev->type, errors);
- }
-
- rpcev->ev_lnet = ev->type;
-
- switch (rpcev->ev_type) {
- default:
- CERROR("Unknown event: status %d, type %d, lnet %d\n",
- rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
- LBUG();
- case SRPC_REQUEST_SENT:
- if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_sent++;
- spin_unlock(&srpc_data.rpc_glock);
- }
- /* fall through */
- case SRPC_REPLY_RCVD:
- case SRPC_BULK_REQ_RCVD:
- crpc = rpcev->ev_data;
-
- if (rpcev != &crpc->crpc_reqstev &&
- rpcev != &crpc->crpc_replyev &&
- rpcev != &crpc->crpc_bulkev) {
- CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n",
- rpcev, crpc, &crpc->crpc_reqstev,
- &crpc->crpc_replyev, &crpc->crpc_bulkev);
- CERROR("Bad event: status %d, type %d, lnet %d\n",
- rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
- LBUG();
- }
-
- spin_lock(&crpc->crpc_lock);
-
- LASSERT(!rpcev->ev_fired);
- rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
- -EINTR : ev->status;
- swi_schedule_workitem(&crpc->crpc_wi);
-
- spin_unlock(&crpc->crpc_lock);
- break;
-
- case SRPC_REQUEST_RCVD:
- scd = rpcev->ev_data;
- sv = scd->scd_svc;
-
- LASSERT(rpcev == &scd->scd_ev);
-
- spin_lock(&scd->scd_lock);
-
- LASSERT(ev->unlinked);
- LASSERT(ev->type == LNET_EVENT_PUT ||
- ev->type == LNET_EVENT_UNLINK);
- LASSERT(ev->type != LNET_EVENT_UNLINK ||
- sv->sv_shuttingdown);
-
- buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg);
- buffer->buf_peer = ev->initiator;
- buffer->buf_self = ev->target.nid;
-
- LASSERT(scd->scd_buf_nposted > 0);
- scd->scd_buf_nposted--;
-
- if (sv->sv_shuttingdown) {
- /*
- * Leave buffer on scd->scd_buf_nposted since
- * srpc_finish_service needs to traverse it.
- */
- spin_unlock(&scd->scd_lock);
- break;
- }
-
- if (scd->scd_buf_err_stamp &&
- scd->scd_buf_err_stamp < ktime_get_real_seconds()) {
- /* re-enable adding buffer */
- scd->scd_buf_err_stamp = 0;
- scd->scd_buf_err = 0;
- }
-
- if (!scd->scd_buf_err && /* adding buffer is enabled */
- !scd->scd_buf_adjust &&
- scd->scd_buf_nposted < scd->scd_buf_low) {
- scd->scd_buf_adjust = max(scd->scd_buf_total / 2,
- SFW_TEST_WI_MIN);
- swi_schedule_workitem(&scd->scd_buf_wi);
- }
-
- list_del(&buffer->buf_list); /* from scd->scd_buf_posted */
- msg = &buffer->buf_msg;
- type = srpc_service2request(sv->sv_id);
-
- if (ev->status || ev->mlength != sizeof(*msg) ||
- (msg->msg_type != type &&
- msg->msg_type != __swab32(type)) ||
- (msg->msg_magic != SRPC_MSG_MAGIC &&
- msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
- CERROR("Dropping RPC (%s) from %s: status %d mlength %d type %u magic %u.\n",
- sv->sv_name, libcfs_id2str(ev->initiator),
- ev->status, ev->mlength,
- msg->msg_type, msg->msg_magic);
-
- /*
- * NB can't call srpc_service_recycle_buffer here since
- * it may call LNetM[DE]Attach. The invalid magic tells
- * srpc_handle_rpc to drop this RPC
- */
- msg->msg_magic = 0;
- }
-
- if (!list_empty(&scd->scd_rpc_free)) {
- srpc = list_entry(scd->scd_rpc_free.next,
- struct srpc_server_rpc,
- srpc_list);
- list_del(&srpc->srpc_list);
-
- srpc_init_server_rpc(srpc, scd, buffer);
- list_add_tail(&srpc->srpc_list,
- &scd->scd_rpc_active);
- swi_schedule_workitem(&srpc->srpc_wi);
- } else {
- list_add_tail(&buffer->buf_list,
- &scd->scd_buf_blocked);
- }
-
- spin_unlock(&scd->scd_lock);
-
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_rcvd++;
- spin_unlock(&srpc_data.rpc_glock);
- break;
-
- case SRPC_BULK_GET_RPLD:
- LASSERT(ev->type == LNET_EVENT_SEND ||
- ev->type == LNET_EVENT_REPLY ||
- ev->type == LNET_EVENT_UNLINK);
-
- if (!ev->unlinked)
- break; /* wait for final event */
- /* fall through */
- case SRPC_BULK_PUT_SENT:
- if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
- spin_lock(&srpc_data.rpc_glock);
-
- if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
- srpc_data.rpc_counters.bulk_get += ev->mlength;
- else
- srpc_data.rpc_counters.bulk_put += ev->mlength;
-
- spin_unlock(&srpc_data.rpc_glock);
- }
- /* fall through */
- case SRPC_REPLY_SENT:
- srpc = rpcev->ev_data;
- scd = srpc->srpc_scd;
-
- LASSERT(rpcev == &srpc->srpc_ev);
-
- spin_lock(&scd->scd_lock);
-
- rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
- -EINTR : ev->status;
- swi_schedule_workitem(&srpc->srpc_wi);
-
- spin_unlock(&scd->scd_lock);
- break;
- }
-}
-
-int
-srpc_startup(void)
-{
- int rc;
-
- memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
- spin_lock_init(&srpc_data.rpc_glock);
-
- /* 1 second pause to avoid timestamp reuse */
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- srpc_data.rpc_matchbits = ((__u64)ktime_get_real_seconds()) << 48;
-
- srpc_data.rpc_state = SRPC_STATE_NONE;
-
- rc = LNetNIInit(LNET_PID_LUSTRE);
- if (rc < 0) {
- CERROR("LNetNIInit() has failed: %d\n", rc);
- return rc;
- }
-
- srpc_data.rpc_state = SRPC_STATE_NI_INIT;
-
- LNetInvalidateEQHandle(&srpc_data.rpc_lnet_eq);
- rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq);
- if (rc) {
- CERROR("LNetEQAlloc() has failed: %d\n", rc);
- goto bail;
- }
-
- rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
- LASSERT(!rc);
- rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL);
- LASSERT(!rc);
-
- srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
-
- rc = stt_startup();
-
-bail:
- if (rc)
- srpc_shutdown();
- else
- srpc_data.rpc_state = SRPC_STATE_RUNNING;
-
- return rc;
-}
-
-void
-srpc_shutdown(void)
-{
- int i;
- int rc;
- int state;
-
- state = srpc_data.rpc_state;
- srpc_data.rpc_state = SRPC_STATE_STOPPING;
-
- switch (state) {
- default:
- LBUG();
- case SRPC_STATE_RUNNING:
- spin_lock(&srpc_data.rpc_glock);
-
- for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
- struct srpc_service *sv = srpc_data.rpc_services[i];
-
- LASSERTF(!sv, "service not empty: id %d, name %s\n",
- i, sv->sv_name);
- }
-
- spin_unlock(&srpc_data.rpc_glock);
-
- stt_shutdown();
- /* fall through */
- case SRPC_STATE_EQ_INIT:
- rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
- rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
- LASSERT(!rc);
- rc = LNetEQFree(srpc_data.rpc_lnet_eq);
- LASSERT(!rc); /* the EQ should have no user by now */
- /* fall through */
- case SRPC_STATE_NI_INIT:
- LNetNIFini();
- }
-}
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h
deleted file mode 100644
index 465b5b534423..000000000000
--- a/drivers/staging/lustre/lnet/selftest/rpc.h
+++ /dev/null
@@ -1,295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __SELFTEST_RPC_H__
-#define __SELFTEST_RPC_H__
-
-#include <uapi/linux/lnet/lnetst.h>
-
-/*
- * LST wired structures
- *
- * XXX: *REPLY == *REQST + 1
- */
-enum srpc_msg_type {
- SRPC_MSG_MKSN_REQST = 0,
- SRPC_MSG_MKSN_REPLY = 1,
- SRPC_MSG_RMSN_REQST = 2,
- SRPC_MSG_RMSN_REPLY = 3,
- SRPC_MSG_BATCH_REQST = 4,
- SRPC_MSG_BATCH_REPLY = 5,
- SRPC_MSG_STAT_REQST = 6,
- SRPC_MSG_STAT_REPLY = 7,
- SRPC_MSG_TEST_REQST = 8,
- SRPC_MSG_TEST_REPLY = 9,
- SRPC_MSG_DEBUG_REQST = 10,
- SRPC_MSG_DEBUG_REPLY = 11,
- SRPC_MSG_BRW_REQST = 12,
- SRPC_MSG_BRW_REPLY = 13,
- SRPC_MSG_PING_REQST = 14,
- SRPC_MSG_PING_REPLY = 15,
- SRPC_MSG_JOIN_REQST = 16,
- SRPC_MSG_JOIN_REPLY = 17,
-};
-
-/* CAVEAT EMPTOR:
- * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
- * and 2nd field matchbits of bulk buffer if any.
- *
- * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
- * session id if needed.
- */
-struct srpc_generic_reqst {
- __u64 rpyid; /* reply buffer matchbits */
- __u64 bulkid; /* bulk buffer matchbits */
-} WIRE_ATTR;
-
-struct srpc_generic_reply {
- __u32 status;
- struct lst_sid sid;
-} WIRE_ATTR;
-
-/* FRAMEWORK RPCs */
-struct srpc_mksn_reqst {
- __u64 mksn_rpyid; /* reply buffer matchbits */
- struct lst_sid mksn_sid; /* session id */
- __u32 mksn_force; /* use brute force */
- char mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR; /* make session request */
-
-struct srpc_mksn_reply {
- __u32 mksn_status; /* session status */
- struct lst_sid mksn_sid; /* session id */
- __u32 mksn_timeout; /* session timeout */
- char mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR; /* make session reply */
-
-struct srpc_rmsn_reqst {
- __u64 rmsn_rpyid; /* reply buffer matchbits */
- struct lst_sid rmsn_sid; /* session id */
-} WIRE_ATTR; /* remove session request */
-
-struct srpc_rmsn_reply {
- __u32 rmsn_status;
- struct lst_sid rmsn_sid; /* session id */
-} WIRE_ATTR; /* remove session reply */
-
-struct srpc_join_reqst {
- __u64 join_rpyid; /* reply buffer matchbits */
- struct lst_sid join_sid; /* session id to join */
- char join_group[LST_NAME_SIZE]; /* group name */
-} WIRE_ATTR;
-
-struct srpc_join_reply {
- __u32 join_status; /* returned status */
- struct lst_sid join_sid; /* session id */
- __u32 join_timeout; /* # seconds' inactivity to
- * expire
- */
- char join_session[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR;
-
-struct srpc_debug_reqst {
- __u64 dbg_rpyid; /* reply buffer matchbits */
- struct lst_sid dbg_sid; /* session id */
- __u32 dbg_flags; /* bitmap of debug */
-} WIRE_ATTR;
-
-struct srpc_debug_reply {
- __u32 dbg_status; /* returned code */
- struct lst_sid dbg_sid; /* session id */
- __u32 dbg_timeout; /* session timeout */
- __u32 dbg_nbatch; /* # of batches in the node */
- char dbg_name[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR;
-
-#define SRPC_BATCH_OPC_RUN 1
-#define SRPC_BATCH_OPC_STOP 2
-#define SRPC_BATCH_OPC_QUERY 3
-
-struct srpc_batch_reqst {
- __u64 bar_rpyid; /* reply buffer matchbits */
- struct lst_sid bar_sid; /* session id */
- struct lst_bid bar_bid; /* batch id */
- __u32 bar_opc; /* create/start/stop batch */
- __u32 bar_testidx; /* index of test */
- __u32 bar_arg; /* parameters */
-} WIRE_ATTR;
-
-struct srpc_batch_reply {
- __u32 bar_status; /* status of request */
- struct lst_sid bar_sid; /* session id */
- __u32 bar_active; /* # of active tests in batch/test */
- __u32 bar_time; /* remained time */
-} WIRE_ATTR;
-
-struct srpc_stat_reqst {
- __u64 str_rpyid; /* reply buffer matchbits */
- struct lst_sid str_sid; /* session id */
- __u32 str_type; /* type of stat */
-} WIRE_ATTR;
-
-struct srpc_stat_reply {
- __u32 str_status;
- struct lst_sid str_sid;
- struct sfw_counters str_fw;
- struct srpc_counters str_rpc;
- struct lnet_counters str_lnet;
-} WIRE_ATTR;
-
-struct test_bulk_req {
- __u32 blk_opc; /* bulk operation code */
- __u32 blk_npg; /* # of pages */
- __u32 blk_flags; /* reserved flags */
-} WIRE_ATTR;
-
-struct test_bulk_req_v1 {
- __u16 blk_opc; /* bulk operation code */
- __u16 blk_flags; /* data check flags */
- __u32 blk_len; /* data length */
- __u32 blk_offset; /* offset */
-} WIRE_ATTR;
-
-struct test_ping_req {
- __u32 png_size; /* size of ping message */
- __u32 png_flags; /* reserved flags */
-} WIRE_ATTR;
-
-struct srpc_test_reqst {
- __u64 tsr_rpyid; /* reply buffer matchbits */
- __u64 tsr_bulkid; /* bulk buffer matchbits */
- struct lst_sid tsr_sid; /* session id */
- struct lst_bid tsr_bid; /* batch id */
- __u32 tsr_service; /* test type: bulk|ping|... */
- __u32 tsr_loop; /* test client loop count or
- * # server buffers needed
- */
- __u32 tsr_concur; /* concurrency of test */
- __u8 tsr_is_client; /* is test client or not */
- __u8 tsr_stop_onerr; /* stop on error */
- __u32 tsr_ndest; /* # of dest nodes */
-
- union {
- struct test_ping_req ping;
- struct test_bulk_req bulk_v0;
- struct test_bulk_req_v1 bulk_v1;
- } tsr_u;
-} WIRE_ATTR;
-
-struct srpc_test_reply {
- __u32 tsr_status; /* returned code */
- struct lst_sid tsr_sid;
-} WIRE_ATTR;
-
-/* TEST RPCs */
-struct srpc_ping_reqst {
- __u64 pnr_rpyid;
- __u32 pnr_magic;
- __u32 pnr_seq;
- __u64 pnr_time_sec;
- __u64 pnr_time_usec;
-} WIRE_ATTR;
-
-struct srpc_ping_reply {
- __u32 pnr_status;
- __u32 pnr_magic;
- __u32 pnr_seq;
-} WIRE_ATTR;
-
-struct srpc_brw_reqst {
- __u64 brw_rpyid; /* reply buffer matchbits */
- __u64 brw_bulkid; /* bulk buffer matchbits */
- __u32 brw_rw; /* read or write */
- __u32 brw_len; /* bulk data len */
- __u32 brw_flags; /* bulk data patterns */
-} WIRE_ATTR; /* bulk r/w request */
-
-struct srpc_brw_reply {
- __u32 brw_status;
-} WIRE_ATTR; /* bulk r/w reply */
-
-#define SRPC_MSG_MAGIC 0xeeb0f00d
-#define SRPC_MSG_VERSION 1
-
-struct srpc_msg {
- __u32 msg_magic; /* magic number */
- __u32 msg_version; /* message version number */
- __u32 msg_type; /* type of message body: srpc_msg_type */
- __u32 msg_reserved0;
- __u32 msg_reserved1;
- __u32 msg_ses_feats; /* test session features */
- union {
- struct srpc_generic_reqst reqst;
- struct srpc_generic_reply reply;
-
- struct srpc_mksn_reqst mksn_reqst;
- struct srpc_mksn_reply mksn_reply;
- struct srpc_rmsn_reqst rmsn_reqst;
- struct srpc_rmsn_reply rmsn_reply;
- struct srpc_debug_reqst dbg_reqst;
- struct srpc_debug_reply dbg_reply;
- struct srpc_batch_reqst bat_reqst;
- struct srpc_batch_reply bat_reply;
- struct srpc_stat_reqst stat_reqst;
- struct srpc_stat_reply stat_reply;
- struct srpc_test_reqst tes_reqst;
- struct srpc_test_reply tes_reply;
- struct srpc_join_reqst join_reqst;
- struct srpc_join_reply join_reply;
-
- struct srpc_ping_reqst ping_reqst;
- struct srpc_ping_reply ping_reply;
- struct srpc_brw_reqst brw_reqst;
- struct srpc_brw_reply brw_reply;
- } msg_body;
-} WIRE_ATTR;
-
-static inline void
-srpc_unpack_msg_hdr(struct srpc_msg *msg)
-{
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- /*
- * We do not swap the magic number here as it is needed to
- * determine whether the body needs to be swapped.
- */
- /* __swab32s(&msg->msg_magic); */
- __swab32s(&msg->msg_type);
- __swab32s(&msg->msg_version);
- __swab32s(&msg->msg_ses_feats);
- __swab32s(&msg->msg_reserved0);
- __swab32s(&msg->msg_reserved1);
-}
-
-#endif /* __SELFTEST_RPC_H__ */
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
deleted file mode 100644
index 05466b85e1c0..000000000000
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ /dev/null
@@ -1,623 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/selftest.h
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-#ifndef __SELFTEST_SELFTEST_H__
-#define __SELFTEST_SELFTEST_H__
-
-#define LNET_ONLY
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/lnet/lib-lnet.h>
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-
-#include "rpc.h"
-#include "timer.h"
-
-#ifndef MADE_WITHOUT_COMPROMISE
-#define MADE_WITHOUT_COMPROMISE
-#endif
-
-#define SWI_STATE_NEWBORN 0
-#define SWI_STATE_REPLY_SUBMITTED 1
-#define SWI_STATE_REPLY_SENT 2
-#define SWI_STATE_REQUEST_SUBMITTED 3
-#define SWI_STATE_REQUEST_SENT 4
-#define SWI_STATE_REPLY_RECEIVED 5
-#define SWI_STATE_BULK_STARTED 6
-#define SWI_STATE_DONE 10
-
-/* forward refs */
-struct srpc_service;
-struct srpc_service_cd;
-struct sfw_test_unit;
-struct sfw_test_instance;
-
-/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
- * services, e.g. create/modify session.
- */
-#define SRPC_SERVICE_DEBUG 0
-#define SRPC_SERVICE_MAKE_SESSION 1
-#define SRPC_SERVICE_REMOVE_SESSION 2
-#define SRPC_SERVICE_BATCH 3
-#define SRPC_SERVICE_TEST 4
-#define SRPC_SERVICE_QUERY_STAT 5
-#define SRPC_SERVICE_JOIN 6
-#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10
-/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
-#define SRPC_SERVICE_BRW 11
-#define SRPC_SERVICE_PING 12
-#define SRPC_SERVICE_MAX_ID 12
-
-#define SRPC_REQUEST_PORTAL 50
-/* a lazy portal for framework RPC requests */
-#define SRPC_FRAMEWORK_REQUEST_PORTAL 51
-/* all reply/bulk RDMAs go to this portal */
-#define SRPC_RDMA_PORTAL 52
-
-static inline enum srpc_msg_type
-srpc_service2request(int service)
-{
- switch (service) {
- default:
- LBUG();
- case SRPC_SERVICE_DEBUG:
- return SRPC_MSG_DEBUG_REQST;
-
- case SRPC_SERVICE_MAKE_SESSION:
- return SRPC_MSG_MKSN_REQST;
-
- case SRPC_SERVICE_REMOVE_SESSION:
- return SRPC_MSG_RMSN_REQST;
-
- case SRPC_SERVICE_BATCH:
- return SRPC_MSG_BATCH_REQST;
-
- case SRPC_SERVICE_TEST:
- return SRPC_MSG_TEST_REQST;
-
- case SRPC_SERVICE_QUERY_STAT:
- return SRPC_MSG_STAT_REQST;
-
- case SRPC_SERVICE_BRW:
- return SRPC_MSG_BRW_REQST;
-
- case SRPC_SERVICE_PING:
- return SRPC_MSG_PING_REQST;
-
- case SRPC_SERVICE_JOIN:
- return SRPC_MSG_JOIN_REQST;
- }
-}
-
-static inline enum srpc_msg_type
-srpc_service2reply(int service)
-{
- return srpc_service2request(service) + 1;
-}
-
-enum srpc_event_type {
- SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source)
- * received
- */
- SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */
- SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */
- SRPC_REPLY_RCVD = 4, /* incoming reply received */
- SRPC_REPLY_SENT = 5, /* outgoing reply sent */
- SRPC_REQUEST_RCVD = 6, /* incoming request received */
- SRPC_REQUEST_SENT = 7, /* outgoing request sent */
-};
-
-/* RPC event */
-struct srpc_event {
- enum srpc_event_type ev_type; /* what's up */
- enum lnet_event_kind ev_lnet; /* LNet event type */
- int ev_fired; /* LNet event fired? */
- int ev_status; /* LNet event status */
- void *ev_data; /* owning server/client RPC */
-};
-
-/* bulk descriptor */
-struct srpc_bulk {
- int bk_len; /* len of bulk data */
- struct lnet_handle_md bk_mdh;
- int bk_sink; /* sink/source */
- int bk_niov; /* # iov in bk_iovs */
- struct bio_vec bk_iovs[0];
-};
-
-/* message buffer descriptor */
-struct srpc_buffer {
- struct list_head buf_list; /* chain on srpc_service::*_msgq */
- struct srpc_msg buf_msg;
- struct lnet_handle_md buf_mdh;
- lnet_nid_t buf_self;
- struct lnet_process_id buf_peer;
-};
-
-struct swi_workitem;
-typedef void (*swi_action_t) (struct swi_workitem *);
-
-struct swi_workitem {
- struct workqueue_struct *swi_wq;
- struct work_struct swi_work;
- swi_action_t swi_action;
- int swi_state;
-};
-
-/* server-side state of a RPC */
-struct srpc_server_rpc {
- /* chain on srpc_service::*_rpcq */
- struct list_head srpc_list;
- struct srpc_service_cd *srpc_scd;
- struct swi_workitem srpc_wi;
- struct srpc_event srpc_ev; /* bulk/reply event */
- lnet_nid_t srpc_self;
- struct lnet_process_id srpc_peer;
- struct srpc_msg srpc_replymsg;
- struct lnet_handle_md srpc_replymdh;
- struct srpc_buffer *srpc_reqstbuf;
- struct srpc_bulk *srpc_bulk;
-
- unsigned int srpc_aborted; /* being given up */
- int srpc_status;
- void (*srpc_done)(struct srpc_server_rpc *);
-};
-
-/* client-side state of a RPC */
-struct srpc_client_rpc {
- struct list_head crpc_list; /* chain on user's lists */
- spinlock_t crpc_lock; /* serialize */
- int crpc_service;
- atomic_t crpc_refcount;
- int crpc_timeout; /* # seconds to wait for reply */
- struct stt_timer crpc_timer;
- struct swi_workitem crpc_wi;
- struct lnet_process_id crpc_dest;
-
- void (*crpc_done)(struct srpc_client_rpc *);
- void (*crpc_fini)(struct srpc_client_rpc *);
- int crpc_status; /* completion status */
- void *crpc_priv; /* caller data */
-
- /* state flags */
- unsigned int crpc_aborted:1; /* being given up */
- unsigned int crpc_closed:1; /* completed */
-
- /* RPC events */
- struct srpc_event crpc_bulkev; /* bulk event */
- struct srpc_event crpc_reqstev; /* request event */
- struct srpc_event crpc_replyev; /* reply event */
-
- /* bulk, request(reqst), and reply exchanged on wire */
- struct srpc_msg crpc_reqstmsg;
- struct srpc_msg crpc_replymsg;
- struct lnet_handle_md crpc_reqstmdh;
- struct lnet_handle_md crpc_replymdh;
- struct srpc_bulk crpc_bulk;
-};
-
-#define srpc_client_rpc_size(rpc) \
-offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
-
-#define srpc_client_rpc_addref(rpc) \
-do { \
- CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \
- (rpc), libcfs_id2str((rpc)->crpc_dest), \
- atomic_read(&(rpc)->crpc_refcount)); \
- LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
- atomic_inc(&(rpc)->crpc_refcount); \
-} while (0)
-
-#define srpc_client_rpc_decref(rpc) \
-do { \
- CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \
- (rpc), libcfs_id2str((rpc)->crpc_dest), \
- atomic_read(&(rpc)->crpc_refcount)); \
- LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
- if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \
- srpc_destroy_client_rpc(rpc); \
-} while (0)
-
-#define srpc_event_pending(rpc) (!(rpc)->crpc_bulkev.ev_fired || \
- !(rpc)->crpc_reqstev.ev_fired || \
- !(rpc)->crpc_replyev.ev_fired)
-
-/* CPU partition data of srpc service */
-struct srpc_service_cd {
- /** serialize */
- spinlock_t scd_lock;
- /** backref to service */
- struct srpc_service *scd_svc;
- /** event buffer */
- struct srpc_event scd_ev;
- /** free RPC descriptors */
- struct list_head scd_rpc_free;
- /** in-flight RPCs */
- struct list_head scd_rpc_active;
- /** workitem for posting buffer */
- struct swi_workitem scd_buf_wi;
- /** CPT id */
- int scd_cpt;
- /** error code for scd_buf_wi */
- int scd_buf_err;
- /** timestamp for scd_buf_err */
- time64_t scd_buf_err_stamp;
- /** total # request buffers */
- int scd_buf_total;
- /** # posted request buffers */
- int scd_buf_nposted;
- /** in progress of buffer posting */
- int scd_buf_posting;
- /** allocate more buffers if scd_buf_nposted < scd_buf_low */
- int scd_buf_low;
- /** increase/decrease some buffers */
- int scd_buf_adjust;
- /** posted message buffers */
- struct list_head scd_buf_posted;
- /** blocked for RPC descriptor */
- struct list_head scd_buf_blocked;
-};
-
-/* number of server workitems (mini-thread) for testing service */
-#define SFW_TEST_WI_MIN 256
-#define SFW_TEST_WI_MAX 2048
-/* extra buffers for tolerating buggy peers, or unbalanced number
- * of peers between partitions
- */
-#define SFW_TEST_WI_EXTRA 64
-
-/* number of server workitems (mini-thread) for framework service */
-#define SFW_FRWK_WI_MIN 16
-#define SFW_FRWK_WI_MAX 256
-
-struct srpc_service {
- int sv_id; /* service id */
- const char *sv_name; /* human readable name */
- int sv_wi_total; /* total server workitems */
- int sv_shuttingdown;
- int sv_ncpts;
- /* percpt data for srpc_service */
- struct srpc_service_cd **sv_cpt_data;
- /* Service callbacks:
- * - sv_handler: process incoming RPC request
- * - sv_bulk_ready: notify bulk data
- */
- int (*sv_handler)(struct srpc_server_rpc *);
- int (*sv_bulk_ready)(struct srpc_server_rpc *, int);
-};
-
-struct sfw_session {
- struct list_head sn_list; /* chain on fw_zombie_sessions */
- struct lst_sid sn_id; /* unique identifier */
- unsigned int sn_timeout; /* # seconds' inactivity to expire */
- int sn_timer_active;
- unsigned int sn_features;
- struct stt_timer sn_timer;
- struct list_head sn_batches; /* list of batches */
- char sn_name[LST_NAME_SIZE];
- atomic_t sn_refcount;
- atomic_t sn_brw_errors;
- atomic_t sn_ping_errors;
- unsigned long sn_started;
-};
-
-#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \
- (sid0).ses_stamp == (sid1).ses_stamp)
-
-struct sfw_batch {
- struct list_head bat_list; /* chain on sn_batches */
- struct lst_bid bat_id; /* batch id */
- int bat_error; /* error code of batch */
- struct sfw_session *bat_session; /* batch's session */
- atomic_t bat_nactive; /* # of active tests */
- struct list_head bat_tests; /* test instances */
-};
-
-struct sfw_test_client_ops {
- int (*tso_init)(struct sfw_test_instance *tsi); /* initialize test
- * client
- */
- void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test
- * client
- */
- int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
- struct lnet_process_id dest,
- struct srpc_client_rpc **rpc); /* prep a tests rpc */
- void (*tso_done_rpc)(struct sfw_test_unit *tsu,
- struct srpc_client_rpc *rpc); /* done a test rpc */
-};
-
-struct sfw_test_instance {
- struct list_head tsi_list; /* chain on batch */
- int tsi_service; /* test type */
- struct sfw_batch *tsi_batch; /* batch */
- struct sfw_test_client_ops *tsi_ops; /* test client operation
- */
-
- /* public parameter for all test units */
- unsigned int tsi_is_client:1; /* is test client */
- unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */
- int tsi_concur; /* concurrency */
- int tsi_loop; /* loop count */
-
- /* status of test instance */
- spinlock_t tsi_lock; /* serialize */
- unsigned int tsi_stopping:1; /* test is stopping */
- atomic_t tsi_nactive; /* # of active test
- * unit
- */
- struct list_head tsi_units; /* test units */
- struct list_head tsi_free_rpcs; /* free rpcs */
- struct list_head tsi_active_rpcs; /* active rpcs */
-
- union {
- struct test_ping_req ping; /* ping parameter */
- struct test_bulk_req bulk_v0; /* bulk parameter */
- struct test_bulk_req_v1 bulk_v1; /* bulk v1 parameter */
- } tsi_u;
-};
-
-/*
- * XXX: trailing (PAGE_SIZE % sizeof(struct lnet_process_id)) bytes at the end
- * of pages are not used
- */
-#define SFW_MAX_CONCUR LST_MAX_CONCUR
-#define SFW_ID_PER_PAGE (PAGE_SIZE / sizeof(struct lnet_process_id_packed))
-#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE)
-#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
-
-struct sfw_test_unit {
- struct list_head tsu_list; /* chain on lst_test_instance */
- struct lnet_process_id tsu_dest; /* id of dest node */
- int tsu_loop; /* loop count of the test */
- struct sfw_test_instance *tsu_instance; /* pointer to test instance */
- void *tsu_private; /* private data */
- struct swi_workitem tsu_worker; /* workitem of the test unit */
-};
-
-struct sfw_test_case {
- struct list_head tsc_list; /* chain on fw_tests */
- struct srpc_service *tsc_srv_service; /* test service */
- struct sfw_test_client_ops *tsc_cli_ops; /* ops of test client */
-};
-
-struct srpc_client_rpc *
-sfw_create_rpc(struct lnet_process_id peer, int service,
- unsigned int features, int nbulkiov, int bulklen,
- void (*done)(struct srpc_client_rpc *), void *priv);
-int sfw_create_test_rpc(struct sfw_test_unit *tsu,
- struct lnet_process_id peer, unsigned int features,
- int nblk, int blklen, struct srpc_client_rpc **rpc);
-void sfw_abort_rpc(struct srpc_client_rpc *rpc);
-void sfw_post_rpc(struct srpc_client_rpc *rpc);
-void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
-void sfw_unpack_message(struct srpc_msg *msg);
-void sfw_free_pages(struct srpc_server_rpc *rpc);
-void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
-int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
- int sink);
-int sfw_make_session(struct srpc_mksn_reqst *request,
- struct srpc_mksn_reply *reply);
-
-struct srpc_client_rpc *
-srpc_create_client_rpc(struct lnet_process_id peer, int service,
- int nbulkiov, int bulklen,
- void (*rpc_done)(struct srpc_client_rpc *),
- void (*rpc_fini)(struct srpc_client_rpc *), void *priv);
-void srpc_post_rpc(struct srpc_client_rpc *rpc);
-void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
-void srpc_free_bulk(struct srpc_bulk *bk);
-struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off,
- unsigned int bulk_npg, unsigned int bulk_len,
- int sink);
-void srpc_send_rpc(struct swi_workitem *wi);
-int srpc_send_reply(struct srpc_server_rpc *rpc);
-int srpc_add_service(struct srpc_service *sv);
-int srpc_remove_service(struct srpc_service *sv);
-void srpc_shutdown_service(struct srpc_service *sv);
-void srpc_abort_service(struct srpc_service *sv);
-int srpc_finish_service(struct srpc_service *sv);
-int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer);
-void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer);
-void srpc_get_counters(struct srpc_counters *cnt);
-void srpc_set_counters(const struct srpc_counters *cnt);
-
-extern struct workqueue_struct *lst_serial_wq;
-extern struct workqueue_struct **lst_test_wq;
-
-static inline int
-srpc_serv_is_framework(struct srpc_service *svc)
-{
- return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID;
-}
-
-static void
-swi_wi_action(struct work_struct *wi)
-{
- struct swi_workitem *swi;
-
- swi = container_of(wi, struct swi_workitem, swi_work);
-
- swi->swi_action(swi);
-}
-
-static inline void
-swi_init_workitem(struct swi_workitem *swi,
- swi_action_t action, struct workqueue_struct *wq)
-{
- swi->swi_wq = wq;
- swi->swi_action = action;
- swi->swi_state = SWI_STATE_NEWBORN;
- INIT_WORK(&swi->swi_work, swi_wi_action);
-}
-
-static inline void
-swi_schedule_workitem(struct swi_workitem *wi)
-{
- queue_work(wi->swi_wq, &wi->swi_work);
-}
-
-static inline int
-swi_cancel_workitem(struct swi_workitem *swi)
-{
- return cancel_work_sync(&swi->swi_work);
-}
-
-int sfw_startup(void);
-int srpc_startup(void);
-void sfw_shutdown(void);
-void srpc_shutdown(void);
-
-static inline void
-srpc_destroy_client_rpc(struct srpc_client_rpc *rpc)
-{
- LASSERT(rpc);
- LASSERT(!srpc_event_pending(rpc));
- LASSERT(!atomic_read(&rpc->crpc_refcount));
-
- if (!rpc->crpc_fini)
- kfree(rpc);
- else
- (*rpc->crpc_fini)(rpc);
-}
-
-static inline void
-srpc_init_client_rpc(struct srpc_client_rpc *rpc, struct lnet_process_id peer,
- int service, int nbulkiov, int bulklen,
- void (*rpc_done)(struct srpc_client_rpc *),
- void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
-{
- LASSERT(nbulkiov <= LNET_MAX_IOV);
-
- memset(rpc, 0, offsetof(struct srpc_client_rpc,
- crpc_bulk.bk_iovs[nbulkiov]));
-
- INIT_LIST_HEAD(&rpc->crpc_list);
- swi_init_workitem(&rpc->crpc_wi, srpc_send_rpc,
- lst_test_wq[lnet_cpt_of_nid(peer.nid)]);
- spin_lock_init(&rpc->crpc_lock);
- atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
-
- rpc->crpc_dest = peer;
- rpc->crpc_priv = priv;
- rpc->crpc_service = service;
- rpc->crpc_bulk.bk_len = bulklen;
- rpc->crpc_bulk.bk_niov = nbulkiov;
- rpc->crpc_done = rpc_done;
- rpc->crpc_fini = rpc_fini;
- LNetInvalidateMDHandle(&rpc->crpc_reqstmdh);
- LNetInvalidateMDHandle(&rpc->crpc_replymdh);
- LNetInvalidateMDHandle(&rpc->crpc_bulk.bk_mdh);
-
- /* no event is expected at this point */
- rpc->crpc_bulkev.ev_fired = 1;
- rpc->crpc_reqstev.ev_fired = 1;
- rpc->crpc_replyev.ev_fired = 1;
-
- rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
- rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
- rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
-}
-
-static inline const char *
-swi_state2str(int state)
-{
-#define STATE2STR(x) case x: return #x
- switch (state) {
- default:
- LBUG();
- STATE2STR(SWI_STATE_NEWBORN);
- STATE2STR(SWI_STATE_REPLY_SUBMITTED);
- STATE2STR(SWI_STATE_REPLY_SENT);
- STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
- STATE2STR(SWI_STATE_REQUEST_SENT);
- STATE2STR(SWI_STATE_REPLY_RECEIVED);
- STATE2STR(SWI_STATE_BULK_STARTED);
- STATE2STR(SWI_STATE_DONE);
- }
-#undef STATE2STR
-}
-
-#define selftest_wait_events() \
- do { \
- set_current_state(TASK_UNINTERRUPTIBLE); \
- schedule_timeout(HZ / 10); \
- } while (0)
-
-#define lst_wait_until(cond, lock, fmt, ...) \
-do { \
- int __I = 2; \
- while (!(cond)) { \
- CDEBUG(is_power_of_2(++__I) ? D_WARNING : D_NET, \
- fmt, ## __VA_ARGS__); \
- spin_unlock(&(lock)); \
- \
- selftest_wait_events(); \
- \
- spin_lock(&(lock)); \
- } \
-} while (0)
-
-static inline void
-srpc_wait_service_shutdown(struct srpc_service *sv)
-{
- int i = 2;
-
- LASSERT(sv->sv_shuttingdown);
-
- while (!srpc_finish_service(sv)) {
- i++;
- CDEBUG(((i & -i) == i) ? D_WARNING : D_NET,
- "Waiting for %s service to shutdown...\n",
- sv->sv_name);
- selftest_wait_events();
- }
-}
-
-extern struct sfw_test_client_ops brw_test_client;
-void brw_init_test_client(void);
-
-extern struct srpc_service brw_test_service;
-void brw_init_test_service(void);
-
-extern struct sfw_test_client_ops ping_test_client;
-void ping_init_test_client(void);
-
-extern struct srpc_service ping_test_service;
-void ping_init_test_service(void);
-
-#endif /* __SELFTEST_SELFTEST_H__ */
diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c
deleted file mode 100644
index 1b2c5fc81358..000000000000
--- a/drivers/staging/lustre/lnet/selftest/timer.c
+++ /dev/null
@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/timer.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-/*
- * Timers are implemented as a sorted queue of expiry times. The queue
- * is slotted, with each slot holding timers which expire in a
- * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
- * sorted by increasing expiry time. The number of slots is 2**7 (128),
- * to cover a time period of 1024 seconds into the future before wrapping.
- */
-#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */
-#define STTIMER_SLOTTIME BIT(STTIMER_MINPOLL)
-#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1))
-#define STTIMER_NSLOTS BIT(7)
-#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
- (STTIMER_NSLOTS - 1))])
-
-static struct st_timer_data {
- spinlock_t stt_lock;
- unsigned long stt_prev_slot; /* start time of the slot processed
- * previously
- */
- struct list_head stt_hash[STTIMER_NSLOTS];
- int stt_shuttingdown;
- wait_queue_head_t stt_waitq;
- int stt_nthreads;
-} stt_data;
-
-void
-stt_add_timer(struct stt_timer *timer)
-{
- struct list_head *pos;
-
- spin_lock(&stt_data.stt_lock);
-
- LASSERT(stt_data.stt_nthreads > 0);
- LASSERT(!stt_data.stt_shuttingdown);
- LASSERT(timer->stt_func);
- LASSERT(list_empty(&timer->stt_list));
- LASSERT(timer->stt_expires > ktime_get_real_seconds());
-
- /* a simple insertion sort */
- list_for_each_prev(pos, STTIMER_SLOT(timer->stt_expires)) {
- struct stt_timer *old = list_entry(pos, struct stt_timer,
- stt_list);
-
- if (timer->stt_expires >= old->stt_expires)
- break;
- }
- list_add(&timer->stt_list, pos);
-
- spin_unlock(&stt_data.stt_lock);
-}
-
-/*
- * The function returns whether it has deactivated a pending timer or not.
- * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
- * active timer returns 1.)
- *
- * CAVEAT EMPTOR:
- * When 0 is returned, it is possible that timer->stt_func _is_ running on
- * another CPU.
- */
-int
-stt_del_timer(struct stt_timer *timer)
-{
- int ret = 0;
-
- spin_lock(&stt_data.stt_lock);
-
- LASSERT(stt_data.stt_nthreads > 0);
- LASSERT(!stt_data.stt_shuttingdown);
-
- if (!list_empty(&timer->stt_list)) {
- ret = 1;
- list_del_init(&timer->stt_list);
- }
-
- spin_unlock(&stt_data.stt_lock);
- return ret;
-}
-
-/* called with stt_data.stt_lock held */
-static int
-stt_expire_list(struct list_head *slot, time64_t now)
-{
- int expired = 0;
- struct stt_timer *timer;
-
- while (!list_empty(slot)) {
- timer = list_entry(slot->next, struct stt_timer, stt_list);
-
- if (timer->stt_expires > now)
- break;
-
- list_del_init(&timer->stt_list);
- spin_unlock(&stt_data.stt_lock);
-
- expired++;
- (*timer->stt_func) (timer->stt_data);
-
- spin_lock(&stt_data.stt_lock);
- }
-
- return expired;
-}
-
-static int
-stt_check_timers(unsigned long *last)
-{
- int expired = 0;
- time64_t now;
- unsigned long this_slot;
-
- now = ktime_get_real_seconds();
- this_slot = now & STTIMER_SLOTTIMEMASK;
-
- spin_lock(&stt_data.stt_lock);
-
- while (cfs_time_aftereq(this_slot, *last)) {
- expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
- this_slot = cfs_time_sub(this_slot, STTIMER_SLOTTIME);
- }
-
- *last = now & STTIMER_SLOTTIMEMASK;
- spin_unlock(&stt_data.stt_lock);
- return expired;
-}
-
-static int
-stt_timer_main(void *arg)
-{
- int rc = 0;
-
- while (!stt_data.stt_shuttingdown) {
- stt_check_timers(&stt_data.stt_prev_slot);
-
- rc = wait_event_timeout(stt_data.stt_waitq,
- stt_data.stt_shuttingdown,
- STTIMER_SLOTTIME * HZ);
- }
-
- spin_lock(&stt_data.stt_lock);
- stt_data.stt_nthreads--;
- spin_unlock(&stt_data.stt_lock);
- return rc;
-}
-
-static int
-stt_start_timer_thread(void)
-{
- struct task_struct *task;
-
- LASSERT(!stt_data.stt_shuttingdown);
-
- task = kthread_run(stt_timer_main, NULL, "st_timer");
- if (IS_ERR(task))
- return PTR_ERR(task);
-
- spin_lock(&stt_data.stt_lock);
- stt_data.stt_nthreads++;
- spin_unlock(&stt_data.stt_lock);
- return 0;
-}
-
-int
-stt_startup(void)
-{
- int rc = 0;
- int i;
-
- stt_data.stt_shuttingdown = 0;
- stt_data.stt_prev_slot = ktime_get_real_seconds() & STTIMER_SLOTTIMEMASK;
-
- spin_lock_init(&stt_data.stt_lock);
- for (i = 0; i < STTIMER_NSLOTS; i++)
- INIT_LIST_HEAD(&stt_data.stt_hash[i]);
-
- stt_data.stt_nthreads = 0;
- init_waitqueue_head(&stt_data.stt_waitq);
- rc = stt_start_timer_thread();
- if (rc)
- CERROR("Can't spawn timer thread: %d\n", rc);
-
- return rc;
-}
-
-void
-stt_shutdown(void)
-{
- int i;
-
- spin_lock(&stt_data.stt_lock);
-
- for (i = 0; i < STTIMER_NSLOTS; i++)
- LASSERT(list_empty(&stt_data.stt_hash[i]));
-
- stt_data.stt_shuttingdown = 1;
-
- wake_up(&stt_data.stt_waitq);
- lst_wait_until(!stt_data.stt_nthreads, stt_data.stt_lock,
- "waiting for %d threads to terminate\n",
- stt_data.stt_nthreads);
-
- spin_unlock(&stt_data.stt_lock);
-}
diff --git a/drivers/staging/lustre/lnet/selftest/timer.h b/drivers/staging/lustre/lnet/selftest/timer.h
deleted file mode 100644
index 7f0ef9bd0cda..000000000000
--- a/drivers/staging/lustre/lnet/selftest/timer.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/timer.h
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-#ifndef __SELFTEST_TIMER_H__
-#define __SELFTEST_TIMER_H__
-
-struct stt_timer {
- struct list_head stt_list;
- time64_t stt_expires;
- void (*stt_func)(void *);
- void *stt_data;
-};
-
-void stt_add_timer(struct stt_timer *timer);
-int stt_del_timer(struct stt_timer *timer);
-int stt_startup(void);
-void stt_shutdown(void);
-
-#endif /* __SELFTEST_TIMER_H__ */