aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/include/lustre_import.h
blob: dcc807676c490ec44e09467795e3c3f2c3aed863 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
/*
 * GPL HEADER START
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 only,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License version 2 for more details (a copy is included
 * in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU General Public License
 * version 2 along with this program; If not, see
 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 *
 * GPL HEADER END
 */
/*
 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
 * Use is subject to license terms.
 *
 * Copyright (c) 2011, 2012, Intel Corporation.
 */
/*
 * This file is part of Lustre, http://www.lustre.org/
 * Lustre is a trademark of Sun Microsystems, Inc.
 */
/** \defgroup obd_import PtlRPC import definitions
 * Imports are client-side representation of remote obd target.
 *
 * @{
 */

#ifndef __IMPORT_H
#define __IMPORT_H

/** \defgroup export export
 *
 * @{
 */

#include "lustre_handles.h"
#include "lustre/lustre_idl.h"


/**
 * Adaptive Timeout stuff
 *
 * @{
 */
#define D_ADAPTTO D_OTHER
#define AT_BINS 4		  /* "bin" means "N seconds of history" */
#define AT_FLG_NOHIST 0x1	  /* use last reported value only */

struct adaptive_timeout {
	time_t		at_binstart;	 /* bin start time */
	unsigned int	at_hist[AT_BINS];    /* timeout history bins */
	unsigned int	at_flags;
	unsigned int	at_current;	  /* current timeout value */
	unsigned int	at_worst_ever;       /* worst-ever timeout value */
	time_t		at_worst_time;       /* worst-ever timeout timestamp */
	spinlock_t	at_lock;
};

struct ptlrpc_at_array {
	struct list_head       *paa_reqs_array; /** array to hold requests */
	__u32	     paa_size;       /** the size of array */
	__u32	     paa_count;      /** the total count of reqs */
	time_t	    paa_deadline;   /** the earliest deadline of reqs */
	__u32	    *paa_reqs_count; /** the count of reqs in each entry */
};

#define IMP_AT_MAX_PORTALS 8
struct imp_at {
	int		     iat_portal[IMP_AT_MAX_PORTALS];
	struct adaptive_timeout iat_net_latency;
	struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
};


/** @} */

/** Possible import states */
enum lustre_imp_state {
	LUSTRE_IMP_CLOSED     = 1,
	LUSTRE_IMP_NEW	= 2,
	LUSTRE_IMP_DISCON     = 3,
	LUSTRE_IMP_CONNECTING = 4,
	LUSTRE_IMP_REPLAY     = 5,
	LUSTRE_IMP_REPLAY_LOCKS = 6,
	LUSTRE_IMP_REPLAY_WAIT  = 7,
	LUSTRE_IMP_RECOVER    = 8,
	LUSTRE_IMP_FULL       = 9,
	LUSTRE_IMP_EVICTED    = 10,
};

/** Returns test string representation of numeric import state \a state */
static inline char *ptlrpc_import_state_name(enum lustre_imp_state state)
{
	static char *import_state_names[] = {
		"<UNKNOWN>", "CLOSED",  "NEW", "DISCONN",
		"CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
		"RECOVER", "FULL", "EVICTED",
	};

	LASSERT (state <= LUSTRE_IMP_EVICTED);
	return import_state_names[state];
}

/**
 * List of import event types
 */
enum obd_import_event {
	IMP_EVENT_DISCON     = 0x808001,
	IMP_EVENT_INACTIVE   = 0x808002,
	IMP_EVENT_INVALIDATE = 0x808003,
	IMP_EVENT_ACTIVE     = 0x808004,
	IMP_EVENT_OCD	= 0x808005,
	IMP_EVENT_DEACTIVATE = 0x808006,
	IMP_EVENT_ACTIVATE   = 0x808007,
};

/**
 * Definition of import connection structure
 */
struct obd_import_conn {
	/** Item for linking connections together */
	struct list_head		oic_item;
	/** Pointer to actual PortalRPC connection */
	struct ptlrpc_connection *oic_conn;
	/** uuid of remote side */
	struct obd_uuid	   oic_uuid;
	/**
	 * Time (64 bit jiffies) of last connection attempt on this connection
	 */
	__u64		     oic_last_attempt;
};

/* state history */
#define IMP_STATE_HIST_LEN 16
struct import_state_hist {
	enum lustre_imp_state ish_state;
	time_t		ish_time;
};

/**
 * Definition of PortalRPC import structure.
 * Imports are representing client-side view to remote target.
 */
struct obd_import {
	/** Local handle (== id) for this import. */
	struct portals_handle     imp_handle;
	/** Reference counter */
	atomic_t	      imp_refcount;
	struct lustre_handle      imp_dlm_handle; /* client's ldlm export */
	/** Currently active connection */
	struct ptlrpc_connection *imp_connection;
	/** PortalRPC client structure for this import */
	struct ptlrpc_client     *imp_client;
	/** List element for linking into pinger chain */
	struct list_head		imp_pinger_chain;
	/** List element for linking into chain for destruction */
	struct list_head		imp_zombie_chain;

	/**
	 * Lists of requests that are retained for replay, waiting for a reply,
	 * or waiting for recovery to complete, respectively.
	 * @{
	 */
	struct list_head		imp_replay_list;
	struct list_head		imp_sending_list;
	struct list_head		imp_delayed_list;
	/** @} */

	/**
	 * List of requests that are retained for committed open replay. Once
	 * open is committed, open replay request will be moved from the
	 * imp_replay_list into the imp_committed_list.
	 * The imp_replay_cursor is for accelerating searching during replay.
	 * @{
	 */
	struct list_head		imp_committed_list;
	struct list_head	       *imp_replay_cursor;
	/** @} */

	/** obd device for this import */
	struct obd_device	*imp_obd;

	/**
	 * some seciruty-related fields
	 * @{
	 */
	struct ptlrpc_sec	*imp_sec;
	struct mutex		  imp_sec_mutex;
	unsigned long		imp_sec_expire;
	/** @} */

	/** Wait queue for those who need to wait for recovery completion */
	wait_queue_head_t	       imp_recovery_waitq;

	/** Number of requests currently in-flight */
	atomic_t	      imp_inflight;
	/** Number of requests currently unregistering */
	atomic_t	      imp_unregistering;
	/** Number of replay requests inflight */
	atomic_t	      imp_replay_inflight;
	/** Number of currently happening import invalidations */
	atomic_t	      imp_inval_count;
	/** Numbner of request timeouts */
	atomic_t	      imp_timeouts;
	/** Current import state */
	enum lustre_imp_state     imp_state;
	/** Last replay state */
	enum lustre_imp_state	  imp_replay_state;
	/** History of import states */
	struct import_state_hist  imp_state_hist[IMP_STATE_HIST_LEN];
	int		       imp_state_hist_idx;
	/** Current import generation. Incremented on every reconnect */
	int		       imp_generation;
	/** Incremented every time we send reconnection request */
	__u32		     imp_conn_cnt;
       /**
	* \see ptlrpc_free_committed remembers imp_generation value here
	* after a check to save on unnecessary replay list iterations
	*/
	int		       imp_last_generation_checked;
	/** Last transno we replayed */
	__u64		     imp_last_replay_transno;
	/** Last transno committed on remote side */
	__u64		     imp_peer_committed_transno;
	/**
	 * \see ptlrpc_free_committed remembers last_transno since its last
	 * check here and if last_transno did not change since last run of
	 * ptlrpc_free_committed and import generation is the same, we can
	 * skip looking for requests to remove from replay list as optimisation
	 */
	__u64		     imp_last_transno_checked;
	/**
	 * Remote export handle. This is how remote side knows what export
	 * we are talking to. Filled from response to connect request
	 */
	struct lustre_handle      imp_remote_handle;
	/** When to perform next ping. time in jiffies. */
	unsigned long		imp_next_ping;
	/** When we last successfully connected. time in 64bit jiffies */
	__u64		     imp_last_success_conn;

	/** List of all possible connection for import. */
	struct list_head		imp_conn_list;
	/**
	 * Current connection. \a imp_connection is imp_conn_current->oic_conn
	 */
	struct obd_import_conn   *imp_conn_current;

	/** Protects flags, level, generation, conn_cnt, *_list */
	spinlock_t		  imp_lock;

	/* flags */
	unsigned long	     imp_no_timeout:1, /* timeouts are disabled */
				  imp_invalid:1,    /* evicted */
				  /* administratively disabled */
				  imp_deactive:1,
				  /* try to recover the import */
				  imp_replayable:1,
				  /* don't run recovery (timeout instead) */
				  imp_dlm_fake:1,
				  /* use 1/2 timeout on MDS' OSCs */
				  imp_server_timeout:1,
				  /* VBR: imp in delayed recovery */
				  imp_delayed_recovery:1,
				  /* VBR: if gap was found then no lock replays
				   */
				  imp_no_lock_replay:1,
				  /* recovery by versions was failed */
				  imp_vbr_failed:1,
				  /* force an immediate ping */
				  imp_force_verify:1,
				  /* force a scheduled ping */
				  imp_force_next_verify:1,
				  /* pingable */
				  imp_pingable:1,
				  /* resend for replay */
				  imp_resend_replay:1,
				  /* disable normal recovery, for test only. */
				  imp_no_pinger_recover:1,
				  /* need IR MNE swab */
				  imp_need_mne_swab:1,
				  /* import must be reconnected instead of
				   * chose new connection */
				  imp_force_reconnect:1,
				  /* import has tried to connect with server */
				  imp_connect_tried:1;
	__u32		     imp_connect_op;
	struct obd_connect_data   imp_connect_data;
	__u64		     imp_connect_flags_orig;
	int		       imp_connect_error;

	__u32		     imp_msg_magic;
	__u32		     imp_msghdr_flags;       /* adjusted based on server capability */

	struct ptlrpc_request_pool *imp_rq_pool;	  /* emergency request pool */

	struct imp_at	     imp_at;		 /* adaptive timeout data */
	time_t		    imp_last_reply_time;    /* for health check */
};

typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
				    int event, void *event_arg, void *cb_data);

/**
 * Structure for import observer.
 * It is possible to register "observer" on an import and every time
 * something happens to an import (like connect/evict/disconnect)
 * obderver will get its callback called with event type
 */
struct obd_import_observer {
	struct list_head	   oio_chain;
	obd_import_callback  oio_cb;
	void		*oio_cb_data;
};

void class_observe_import(struct obd_import *imp, obd_import_callback cb,
			  void *cb_data);
void class_unobserve_import(struct obd_import *imp, obd_import_callback cb,
			    void *cb_data);
void class_notify_import_observers(struct obd_import *imp, int event,
				   void *event_arg);

/* import.c */
static inline unsigned int at_est2timeout(unsigned int val)
{
	/* add an arbitrary minimum: 125% +5 sec */
	return (val + (val >> 2) + 5);
}

static inline unsigned int at_timeout2est(unsigned int val)
{
	/* restore estimate value from timeout: e=4/5(t-5) */
	LASSERT(val);
	return (max((val << 2) / 5, 5U) - 4);
}

static inline void at_reset(struct adaptive_timeout *at, int val)
{
	spin_lock(&at->at_lock);
	at->at_current = val;
	at->at_worst_ever = val;
	at->at_worst_time = get_seconds();
	spin_unlock(&at->at_lock);
}
static inline void at_init(struct adaptive_timeout *at, int val, int flags)
{
	memset(at, 0, sizeof(*at));
	spin_lock_init(&at->at_lock);
	at->at_flags = flags;
	at_reset(at, val);
}
extern unsigned int at_min;
static inline int at_get(struct adaptive_timeout *at)
{
	return (at->at_current > at_min) ? at->at_current : at_min;
}
int at_measured(struct adaptive_timeout *at, unsigned int val);
int import_at_get_index(struct obd_import *imp, int portal);
extern unsigned int at_max;
#define AT_OFF (at_max == 0)

/* genops.c */
struct obd_export;
extern struct obd_import *class_exp2cliimp(struct obd_export *);
extern struct obd_import *class_conn2cliimp(struct lustre_handle *);

/** @} import */

#endif /* __IMPORT_H */

/** @} obd_import */