aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/include/lustre_net.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/include/lustre_net.h')
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h898
1 files changed, 155 insertions, 743 deletions
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index e9aba99ee52a..411eb0dc7f38 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -50,6 +50,7 @@
* @{
*/
+#include <linux/uio.h>
#include "../../include/linux/libcfs/libcfs.h"
#include "../../include/linux/lnet/nidstr.h"
#include "../../include/linux/lnet/api.h"
@@ -68,13 +69,17 @@
#define PTLRPC_MD_OPTIONS 0
/**
- * Max # of bulk operations in one request.
+ * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ...
* In order for the client and server to properly negotiate the maximum
* possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
* value. The client is free to limit the actual RPC size for any bulk
* transfer via cl_max_pages_per_rpc to some non-power-of-two value.
+ * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS.
*/
-#define PTLRPC_BULK_OPS_BITS 2
+#define PTLRPC_BULK_OPS_BITS 4
+#if PTLRPC_BULK_OPS_BITS > 16
+#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS."
+#endif
#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS)
/**
* PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
@@ -437,6 +442,10 @@ struct ptlrpc_reply_state {
unsigned long rs_committed:1;/* the transaction was committed
* and the rs was dispatched
*/
+ atomic_t rs_refcount; /* number of users */
+ /** Number of locks awaiting client ACK */
+ int rs_nlocks;
+
/** Size of the state */
int rs_size;
/** opcode */
@@ -449,7 +458,6 @@ struct ptlrpc_reply_state {
struct ptlrpc_service_part *rs_svcpt;
/** Lnet metadata handle for the reply */
lnet_handle_md_t rs_md_h;
- atomic_t rs_refcount;
/** Context for the service thread */
struct ptlrpc_svc_ctx *rs_svc_ctx;
@@ -466,8 +474,6 @@ struct ptlrpc_reply_state {
*/
struct lustre_msg *rs_msg; /* reply message */
- /** Number of locks awaiting client ACK */
- int rs_nlocks;
/** Handles of locks awaiting client reply ACK */
struct lustre_handle rs_locks[RS_MAX_LOCKS];
/** Lock modes of locks in \a rs_locks */
@@ -515,717 +521,7 @@ struct lu_env;
struct ldlm_lock;
-/**
- * \defgroup nrs Network Request Scheduler
- * @{
- */
-struct ptlrpc_nrs_policy;
-struct ptlrpc_nrs_resource;
-struct ptlrpc_nrs_request;
-
-/**
- * NRS control operations.
- *
- * These are common for all policies.
- */
-enum ptlrpc_nrs_ctl {
- /**
- * Not a valid opcode.
- */
- PTLRPC_NRS_CTL_INVALID,
- /**
- * Activate the policy.
- */
- PTLRPC_NRS_CTL_START,
- /**
- * Reserved for multiple primary policies, which may be a possibility
- * in the future.
- */
- PTLRPC_NRS_CTL_STOP,
- /**
- * Policies can start using opcodes from this value and onwards for
- * their own purposes; the assigned value itself is arbitrary.
- */
- PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
-};
-
-/**
- * ORR policy operations
- */
-enum nrs_ctl_orr {
- NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
- NRS_CTL_ORR_WR_QUANTUM,
- NRS_CTL_ORR_RD_OFF_TYPE,
- NRS_CTL_ORR_WR_OFF_TYPE,
- NRS_CTL_ORR_RD_SUPP_REQ,
- NRS_CTL_ORR_WR_SUPP_REQ,
-};
-
-/**
- * NRS policy operations.
- *
- * These determine the behaviour of a policy, and are called in response to
- * NRS core events.
- */
-struct ptlrpc_nrs_pol_ops {
- /**
- * Called during policy registration; this operation is optional.
- *
- * \param[in,out] policy The policy being initialized
- */
- int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called during policy unregistration; this operation is optional.
- *
- * \param[in,out] policy The policy being unregistered/finalized
- */
- void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when activating a policy via lprocfs; policies allocate and
- * initialize their resources here; this operation is optional.
- *
- * \param[in,out] policy The policy being started
- *
- * \see nrs_policy_start_locked()
- */
- int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when deactivating a policy via lprocfs; policies deallocate
- * their resources here; this operation is optional
- *
- * \param[in,out] policy The policy being stopped
- *
- * \see nrs_policy_stop0()
- */
- void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
- /**
- * Used for policy-specific operations; i.e. not generic ones like
- * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
- * to an ioctl; this operation is optional.
- *
- * \param[in,out] policy The policy carrying out operation \a opc
- * \param[in] opc The command operation being carried out
- * \param[in,out] arg An generic buffer for communication between the
- * user and the control operation
- *
- * \retval -ve error
- * \retval 0 success
- *
- * \see ptlrpc_nrs_policy_control()
- */
- int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
- enum ptlrpc_nrs_ctl opc, void *arg);
-
- /**
- * Called when obtaining references to the resources of the resource
- * hierarchy for a request that has arrived for handling at the PTLRPC
- * service. Policies should return -ve for requests they do not wish
- * to handle. This operation is mandatory.
- *
- * \param[in,out] policy The policy we're getting resources for.
- * \param[in,out] nrq The request we are getting resources for.
- * \param[in] parent The parent resource of the resource being
- * requested; set to NULL if none.
- * \param[out] resp The resource is to be returned here; the
- * fallback policy in an NRS head should
- * \e always return a non-NULL pointer value.
- * \param[in] moving_req When set, signifies that this is an attempt
- * to obtain resources for a request being moved
- * to the high-priority NRS head by
- * ldlm_lock_reorder_req().
- * This implies two things:
- * 1. We are under obd_export::exp_rpc_lock and
- * so should not sleep.
- * 2. We should not perform non-idempotent or can
- * skip performing idempotent operations that
- * were carried out when resources were first
- * taken for the request when it was initialized
- * in ptlrpc_nrs_req_initialize().
- *
- * \retval 0, +ve The level of the returned resource in the resource
- * hierarchy; currently only 0 (for a non-leaf resource)
- * and 1 (for a leaf resource) are supported by the
- * framework.
- * \retval -ve error
- *
- * \see ptlrpc_nrs_req_initialize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- */
- int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq,
- const struct ptlrpc_nrs_resource *parent,
- struct ptlrpc_nrs_resource **resp,
- bool moving_req);
- /**
- * Called when releasing references taken for resources in the resource
- * hierarchy for the request; this operation is optional.
- *
- * \param[in,out] policy The policy the resource belongs to
- * \param[in] res The resource to be freed
- *
- * \see ptlrpc_nrs_req_finalize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- */
- void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
- const struct ptlrpc_nrs_resource *res);
-
- /**
- * Obtains a request for handling from the policy, and optionally
- * removes the request from the policy; this operation is mandatory.
- *
- * \param[in,out] policy The policy to poll
- * \param[in] peek When set, signifies that we just want to
- * examine the request, and not handle it, so the
- * request is not removed from the policy.
- * \param[in] force When set, it will force a policy to return a
- * request if it has one queued.
- *
- * \retval NULL No request available for handling
- * \retval valid-pointer The request polled for handling
- *
- * \see ptlrpc_nrs_req_get_nolock()
- */
- struct ptlrpc_nrs_request *
- (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
- bool force);
- /**
- * Called when attempting to add a request to a policy for later
- * handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy on which to enqueue \a nrq
- * \param[in,out] nrq The request to enqueue
- *
- * \retval 0 success
- * \retval != 0 error
- *
- * \see ptlrpc_nrs_req_add_nolock()
- */
- int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Removes a request from the policy's set of pending requests. Normally
- * called after a request has been polled successfully from the policy
- * for handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy the request \a nrq belongs to
- * \param[in,out] nrq The request to dequeue
- */
- void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Called after the request being carried out. Could be used for
- * job/resource control; this operation is optional.
- *
- * \param[in,out] policy The policy which is stopping to handle request
- * \a nrq
- * \param[in,out] nrq The request
- *
- * \pre assert_spin_locked(&svcpt->scp_req_lock)
- *
- * \see ptlrpc_nrs_req_stop_nolock()
- */
- void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Registers the policy's lprocfs interface with a PTLRPC service.
- *
- * \param[in] svc The service
- *
- * \retval 0 success
- * \retval != 0 error
- */
- int (*op_lprocfs_init)(struct ptlrpc_service *svc);
- /**
- * Unegisters the policy's lprocfs interface with a PTLRPC service.
- *
- * In cases of failed policy registration in
- * \e ptlrpc_nrs_policy_register(), this function may be called for a
- * service which has not registered the policy successfully, so
- * implementations of this method should make sure their operations are
- * safe in such cases.
- *
- * \param[in] svc The service
- */
- void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
-};
-
-/**
- * Policy flags
- */
-enum nrs_policy_flags {
- /**
- * Fallback policy, use this flag only on a single supported policy per
- * service. The flag cannot be used on policies that use
- * \e PTLRPC_NRS_FL_REG_EXTERN
- */
- PTLRPC_NRS_FL_FALLBACK = (1 << 0),
- /**
- * Start policy immediately after registering.
- */
- PTLRPC_NRS_FL_REG_START = (1 << 1),
- /**
- * This is a policy registering from a module different to the one NRS
- * core ships in (currently ptlrpc).
- */
- PTLRPC_NRS_FL_REG_EXTERN = (1 << 2),
-};
-
-/**
- * NRS queue type.
- *
- * Denotes whether an NRS instance is for handling normal or high-priority
- * RPCs, or whether an operation pertains to one or both of the NRS instances
- * in a service.
- */
-enum ptlrpc_nrs_queue_type {
- PTLRPC_NRS_QUEUE_REG = (1 << 0),
- PTLRPC_NRS_QUEUE_HP = (1 << 1),
- PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
-};
-
-/**
- * NRS head
- *
- * A PTLRPC service has at least one NRS head instance for handling normal
- * priority RPCs, and may optionally have a second NRS head instance for
- * handling high-priority RPCs. Each NRS head maintains a list of available
- * policies, of which one and only one policy is acting as the fallback policy,
- * and optionally a different policy may be acting as the primary policy. For
- * all RPCs handled by this NRS head instance, NRS core will first attempt to
- * enqueue the RPC using the primary policy (if any). The fallback policy is
- * used in the following cases:
- * - when there was no primary policy in the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
- * was initialized.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, denoted it did not wish, or for some other reason was
- * not able to handle the request, by returning a non-valid NRS resource
- * reference.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, fails later during the request enqueueing stage.
- *
- * \see nrs_resource_get_safe()
- * \see nrs_request_enqueue()
- */
-struct ptlrpc_nrs {
- spinlock_t nrs_lock;
- /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
- /**
- * List of registered policies
- */
- struct list_head nrs_policy_list;
- /**
- * List of policies with queued requests. Policies that have any
- * outstanding requests are queued here, and this list is queried
- * in a round-robin manner from NRS core when obtaining a request
- * for handling. This ensures that requests from policies that at some
- * point transition away from the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
- */
- struct list_head nrs_policy_queued;
- /**
- * Service partition for this NRS head
- */
- struct ptlrpc_service_part *nrs_svcpt;
- /**
- * Primary policy, which is the preferred policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_primary;
- /**
- * Fallback policy, which is the backup policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_fallback;
- /**
- * This NRS head handles either HP or regular requests
- */
- enum ptlrpc_nrs_queue_type nrs_queue_type;
- /**
- * # queued requests from all policies in this NRS head
- */
- unsigned long nrs_req_queued;
- /**
- * # scheduled requests from all policies in this NRS head
- */
- unsigned long nrs_req_started;
- /**
- * # policies on this NRS
- */
- unsigned nrs_num_pols;
- /**
- * This NRS head is in progress of starting a policy
- */
- unsigned nrs_policy_starting:1;
- /**
- * In progress of shutting down the whole NRS head; used during
- * unregistration
- */
- unsigned nrs_stopping:1;
-};
-
-#define NRS_POL_NAME_MAX 16
-
-struct ptlrpc_nrs_pol_desc;
-
-/**
- * Service compatibility predicate; this determines whether a policy is adequate
- * for handling RPCs of a particular PTLRPC service.
- *
- * XXX:This should give the same result during policy registration and
- * unregistration, and for all partitions of a service; so the result should not
- * depend on temporal service or other properties, that may influence the
- * result.
- */
-typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc,
- const struct ptlrpc_nrs_pol_desc *desc);
-
-struct ptlrpc_nrs_pol_conf {
- /**
- * Human-readable policy name
- */
- char nc_name[NRS_POL_NAME_MAX];
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *nc_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t nc_compat;
- /**
- * Set for policies that support a single ptlrpc service, i.e. ones that
- * have \a pd_compat set to nrs_policy_compat_one(). The variable value
- * depicts the name of the single service that such policies are
- * compatible with.
- */
- const char *nc_compat_svc_name;
- /**
- * Owner module for this policy descriptor; policies registering from a
- * different module to the one the NRS framework is held within
- * (currently ptlrpc), should set this field to THIS_MODULE.
- */
- struct module *nc_owner;
- /**
- * Policy registration flags; a bitmask of \e nrs_policy_flags
- */
- unsigned nc_flags;
-};
-
-/**
- * NRS policy registering descriptor
- *
- * Is used to hold a description of a policy that can be passed to NRS core in
- * order to register the policy with NRS heads in different PTLRPC services.
- */
-struct ptlrpc_nrs_pol_desc {
- /**
- * Human-readable policy name
- */
- char pd_name[NRS_POL_NAME_MAX];
- /**
- * Link into nrs_core::nrs_policies
- */
- struct list_head pd_list;
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *pd_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t pd_compat;
- /**
- * Set for policies that are compatible with only one PTLRPC service.
- *
- * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
- */
- const char *pd_compat_svc_name;
- /**
- * Owner module for this policy descriptor.
- *
- * We need to hold a reference to the module whenever we might make use
- * of any of the module's contents, i.e.
- * - If one or more instances of the policy are at a state where they
- * might be handling a request, i.e.
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
- * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
- * is taken on the module when
- * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
- * becomes 0, so that we hold only one reference to the module maximum
- * at any time.
- *
- * We do not need to hold a reference to the module, even though we
- * might use code and data from the module, in the following cases:
- * - During external policy registration, because this should happen in
- * the module's init() function, in which case the module is safe from
- * removal because a reference is being held on the module by the
- * kernel, and iirc kmod (and I guess module-init-tools also) will
- * serialize any racing processes properly anyway.
- * - During external policy unregistration, because this should happen
- * in a module's exit() function, and any attempts to start a policy
- * instance would need to take a reference on the module, and this is
- * not possible once we have reached the point where the exit()
- * handler is called.
- * - During service registration and unregistration, as service setup
- * and cleanup, and policy registration, unregistration and policy
- * instance starting, are serialized by \e nrs_core::nrs_mutex, so
- * as long as users adhere to the convention of registering policies
- * in init() and unregistering them in module exit() functions, there
- * should not be a race between these operations.
- * - During any policy-specific lprocfs operations, because a reference
- * is held by the kernel on a proc entry that has been entered by a
- * syscall, so as long as proc entries are removed during unregistration time,
- * then unregistration and lprocfs operations will be properly
- * serialized.
- */
- struct module *pd_owner;
- /**
- * Bitmask of \e nrs_policy_flags
- */
- unsigned pd_flags;
- /**
- * # of references on this descriptor
- */
- atomic_t pd_refs;
-};
-
-/**
- * NRS policy state
- *
- * Policies transition from one state to the other during their lifetime
- */
-enum ptlrpc_nrs_pol_state {
- /**
- * Not a valid policy state.
- */
- NRS_POL_STATE_INVALID,
- /**
- * Policies are at this state either at the start of their life, or
- * transition here when the user selects a different policy to act
- * as the primary one.
- */
- NRS_POL_STATE_STOPPED,
- /**
- * Policy is progress of stopping
- */
- NRS_POL_STATE_STOPPING,
- /**
- * Policy is in progress of starting
- */
- NRS_POL_STATE_STARTING,
- /**
- * A policy is in this state in two cases:
- * - it is the fallback policy, which is always in this state.
- * - it has been activated by the user; i.e. it is the primary policy,
- */
- NRS_POL_STATE_STARTED,
-};
-
-/**
- * NRS policy information
- *
- * Used for obtaining information for the status of a policy via lprocfs
- */
-struct ptlrpc_nrs_pol_info {
- /**
- * Policy name
- */
- char pi_name[NRS_POL_NAME_MAX];
- /**
- * Current policy state
- */
- enum ptlrpc_nrs_pol_state pi_state;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pi_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pi_req_started;
- /**
- * Is this a fallback policy?
- */
- unsigned pi_fallback:1;
-};
-
-/**
- * NRS policy
- *
- * There is one instance of this for each policy in each NRS head of each
- * PTLRPC service partition.
- */
-struct ptlrpc_nrs_policy {
- /**
- * Linkage into the NRS head's list of policies,
- * ptlrpc_nrs:nrs_policy_list
- */
- struct list_head pol_list;
- /**
- * Linkage into the NRS head's list of policies with enqueued
- * requests ptlrpc_nrs:nrs_policy_queued
- */
- struct list_head pol_list_queued;
- /**
- * Current state of this policy
- */
- enum ptlrpc_nrs_pol_state pol_state;
- /**
- * Bitmask of nrs_policy_flags
- */
- unsigned pol_flags;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pol_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pol_req_started;
- /**
- * Usage Reference count taken on the policy instance
- */
- long pol_ref;
- /**
- * The NRS head this policy has been created at
- */
- struct ptlrpc_nrs *pol_nrs;
- /**
- * Private policy data; varies by policy type
- */
- void *pol_private;
- /**
- * Policy descriptor for this policy instance.
- */
- struct ptlrpc_nrs_pol_desc *pol_desc;
-};
-
-/**
- * NRS resource
- *
- * Resources are embedded into two types of NRS entities:
- * - Inside NRS policies, in the policy's private data in
- * ptlrpc_nrs_policy::pol_private
- * - In objects that act as prime-level scheduling entities in different NRS
- * policies; e.g. on a policy that performs round robin or similar order
- * scheduling across client NIDs, there would be one NRS resource per unique
- * client NID. On a policy which performs round robin scheduling across
- * backend filesystem objects, there would be one resource associated with
- * each of the backend filesystem objects partaking in the scheduling
- * performed by the policy.
- *
- * NRS resources share a parent-child relationship, in which resources embedded
- * in policy instances are the parent entities, with all scheduling entities
- * a policy schedules across being the children, thus forming a simple resource
- * hierarchy. This hierarchy may be extended with one or more levels in the
- * future if the ability to have more than one primary policy is added.
- *
- * Upon request initialization, references to the then active NRS policies are
- * taken and used to later handle the dispatching of the request with one of
- * these policies.
- *
- * \see nrs_resource_get_safe()
- * \see ptlrpc_nrs_req_add()
- */
-struct ptlrpc_nrs_resource {
- /**
- * This NRS resource's parent; is NULL for resources embedded in NRS
- * policy instances; i.e. those are top-level ones.
- */
- struct ptlrpc_nrs_resource *res_parent;
- /**
- * The policy associated with this resource.
- */
- struct ptlrpc_nrs_policy *res_policy;
-};
-
-enum {
- NRS_RES_FALLBACK,
- NRS_RES_PRIMARY,
- NRS_RES_MAX
-};
-
-/* \name fifo
- *
- * FIFO policy
- *
- * This policy is a logical wrapper around previous, non-NRS functionality.
- * It dispatches RPCs in the same order as they arrive from the network. This
- * policy is currently used as the fallback policy, and the only enabled policy
- * on all NRS heads of all PTLRPC service partitions.
- * @{
- */
-
-/**
- * Private data structure for the FIFO policy
- */
-struct nrs_fifo_head {
- /**
- * Resource object for policy instance.
- */
- struct ptlrpc_nrs_resource fh_res;
- /**
- * List of queued requests.
- */
- struct list_head fh_list;
- /**
- * For debugging purposes.
- */
- __u64 fh_sequence;
-};
-
-struct nrs_fifo_req {
- struct list_head fr_list;
- __u64 fr_sequence;
-};
-
-/** @} fifo */
-
-/**
- * NRS request
- *
- * Instances of this object exist embedded within ptlrpc_request; the main
- * purpose of this object is to hold references to the request's resources
- * for the lifetime of the request, and to hold properties that policies use
- * use for determining the request's scheduling priority.
- */
-struct ptlrpc_nrs_request {
- /**
- * The request's resource hierarchy.
- */
- struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
- /**
- * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
- * policy that was used to enqueue the request.
- *
- * \see nrs_request_enqueue()
- */
- unsigned nr_res_idx;
- unsigned nr_initialized:1;
- unsigned nr_enqueued:1;
- unsigned nr_started:1;
- unsigned nr_finalized:1;
-
- /**
- * Policy-specific fields, used for determining a request's scheduling
- * priority, and other supporting functionality.
- */
- union {
- /**
- * Fields for the FIFO policy
- */
- struct nrs_fifo_req fifo;
- } nr_u;
- /**
- * Externally-registering policies may want to use this to allocate
- * their own request properties.
- */
- void *ext;
-};
-
-/** @} nrs */
+#include "lustre_nrs.h"
/**
* Basic request prioritization operations structure.
@@ -1304,6 +600,8 @@ struct ptlrpc_cli_req {
union ptlrpc_async_args cr_async_args;
/** Opaq data for replay and commit callbacks. */
void *cr_cb_data;
+ /** Link to the imp->imp_unreplied_list */
+ struct list_head cr_unreplied_list;
/**
* Commit callback, called when request is committed and about to be
* freed.
@@ -1343,6 +641,7 @@ struct ptlrpc_cli_req {
#define rq_interpret_reply rq_cli.cr_reply_interp
#define rq_async_args rq_cli.cr_async_args
#define rq_cb_data rq_cli.cr_cb_data
+#define rq_unreplied_list rq_cli.cr_unreplied_list
#define rq_commit_cb rq_cli.cr_commit_cb
#define rq_replay_cb rq_cli.cr_replay_cb
@@ -1505,6 +804,8 @@ struct ptlrpc_request {
__u64 rq_transno;
/** xid */
__u64 rq_xid;
+ /** bulk match bits */
+ u64 rq_mbits;
/**
* List item to for replay list. Not yet committed requests get linked
* there.
@@ -1793,10 +1094,93 @@ struct ptlrpc_bulk_page {
struct page *bp_page;
};
-#define BULK_GET_SOURCE 0
-#define BULK_PUT_SINK 1
-#define BULK_GET_SINK 2
-#define BULK_PUT_SOURCE 3
+enum ptlrpc_bulk_op_type {
+ PTLRPC_BULK_OP_ACTIVE = 0x00000001,
+ PTLRPC_BULK_OP_PASSIVE = 0x00000002,
+ PTLRPC_BULK_OP_PUT = 0x00000004,
+ PTLRPC_BULK_OP_GET = 0x00000008,
+ PTLRPC_BULK_BUF_KVEC = 0x00000010,
+ PTLRPC_BULK_BUF_KIOV = 0x00000020,
+ PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET,
+ PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT,
+ PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET,
+ PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT,
+};
+
+static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET;
+}
+
+static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+ == PTLRPC_BULK_BUF_KVEC;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+ == PTLRPC_BULK_BUF_KIOV;
+}
+
+static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_OP_ACTIVE) |
+ (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE;
+}
+
+static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_OP_ACTIVE) |
+ (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE;
+}
+
+struct ptlrpc_bulk_frag_ops {
+ /**
+ * Add a page \a page to the bulk descriptor \a desc
+ * Data to transfer in the page starts at offset \a pageoffset and
+ * amount of data to transfer from the page is \a len
+ */
+ void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset, int len);
+
+ /*
+ * Add a \a fragment to the bulk descriptor \a desc.
+ * Data to transfer in the fragment is pointed to by \a frag
+ * The size of the fragment is \a len
+ */
+ int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len);
+
+ /**
+ * Uninitialize and free bulk descriptor \a desc.
+ * Works on bulk descriptors both from server and client side.
+ */
+ void (*release_frags)(struct ptlrpc_bulk_desc *desc);
+};
+
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops;
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops;
/**
* Definition of bulk descriptor.
@@ -1811,14 +1195,14 @@ struct ptlrpc_bulk_page {
struct ptlrpc_bulk_desc {
/** completed with failure */
unsigned long bd_failure:1;
- /** {put,get}{source,sink} */
- unsigned long bd_type:2;
/** client side */
unsigned long bd_registered:1;
/** For serialization with callback */
spinlock_t bd_lock;
/** Import generation when request for this bulk was sent */
int bd_import_generation;
+ /** {put,get}{source,sink}{kvec,kiov} */
+ enum ptlrpc_bulk_op_type bd_type;
/** LNet portal for this bulk */
__u32 bd_portal;
/** Server side - export this bulk created for */
@@ -1827,13 +1211,14 @@ struct ptlrpc_bulk_desc {
struct obd_import *bd_import;
/** Back pointer to the request */
struct ptlrpc_request *bd_req;
+ struct ptlrpc_bulk_frag_ops *bd_frag_ops;
wait_queue_head_t bd_waitq; /* server side only WQ */
int bd_iov_count; /* # entries in bd_iov */
int bd_max_iov; /* allocated size of bd_iov */
int bd_nob; /* # bytes covered */
int bd_nob_transferred; /* # bytes GOT/PUT */
- __u64 bd_last_xid;
+ u64 bd_last_mbits;
struct ptlrpc_cb_id bd_cbid; /* network callback info */
lnet_nid_t bd_sender; /* stash event::sender */
@@ -1842,14 +1227,31 @@ struct ptlrpc_bulk_desc {
/** array of associated MDs */
lnet_handle_md_t bd_mds[PTLRPC_BULK_OPS_COUNT];
- /*
- * encrypt iov, size is either 0 or bd_iov_count.
- */
- lnet_kiov_t *bd_enc_iov;
-
- lnet_kiov_t bd_iov[0];
+ union {
+ struct {
+ /*
+ * encrypt iov, size is either 0 or bd_iov_count.
+ */
+ struct bio_vec *bd_enc_vec;
+ struct bio_vec *bd_vec; /* Array of bio_vecs */
+ } bd_kiov;
+
+ struct {
+ struct kvec *bd_enc_kvec;
+ struct kvec *bd_kvec; /* Array of kvecs */
+ } bd_kvec;
+ } bd_u;
};
+#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec)
+#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i])
+#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec)
+#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i])
+#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec)
+#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i])
+#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec)
+#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i])
+
enum {
SVC_STOPPED = 1 << 0,
SVC_STOPPING = 1 << 1,
@@ -2464,21 +1866,17 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
void ptlrpc_req_finished(struct ptlrpc_request *request);
struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
- unsigned npages, unsigned max_brw,
- unsigned type, unsigned portal);
-void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin);
-static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk)
-{
- __ptlrpc_free_bulk(bulk, 1);
-}
-
-static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
-{
- __ptlrpc_free_bulk(bulk, 0);
-}
-
+ unsigned int nfrags,
+ unsigned int max_brw,
+ unsigned int type,
+ unsigned int portal,
+ const struct ptlrpc_bulk_frag_ops *ops);
+
+int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc,
+ void *frag, int len);
void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset, int len, int);
+ struct page *page, int pageoffset, int len,
+ int pin);
static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
struct page *page, int pageoffset,
int len)
@@ -2493,6 +1891,16 @@ static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
}
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
+
+static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc)
+{
+ int i;
+
+ for (i = 0; i < desc->bd_iov_count ; i++)
+ put_page(BD_GET_KIOV(desc, i).bv_page);
+}
+
void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
struct obd_import *imp);
__u64 ptlrpc_next_xid(void);
@@ -2652,6 +2060,7 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
__u32 lustre_msg_get_type(struct lustre_msg *msg);
void lustre_msg_add_version(struct lustre_msg *msg, u32 version);
__u32 lustre_msg_get_opc(struct lustre_msg *msg);
+__u16 lustre_msg_get_tag(struct lustre_msg *msg);
__u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
__u64 *lustre_msg_get_versions(struct lustre_msg *msg);
__u64 lustre_msg_get_transno(struct lustre_msg *msg);
@@ -2670,6 +2079,8 @@ void lustre_msg_set_handle(struct lustre_msg *msg,
struct lustre_handle *handle);
void lustre_msg_set_type(struct lustre_msg *msg, __u32 type);
void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc);
+void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid);
+void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag);
void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions);
void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno);
void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
@@ -2679,6 +2090,7 @@ void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
+void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits);
static inline void
lustre_shrink_reply(struct ptlrpc_request *req, int segment,