diff options
Diffstat (limited to 'drivers/staging/lustre/include')
47 files changed, 9623 insertions, 0 deletions
diff --git a/drivers/staging/lustre/include/linux/libcfs/bitmap.h b/drivers/staging/lustre/include/linux/libcfs/bitmap.h new file mode 100644 index 000000000000..3f1c37b4bb7a --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/bitmap.h @@ -0,0 +1,111 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ +#ifndef _LIBCFS_BITMAP_H_ +#define _LIBCFS_BITMAP_H_ + + +typedef struct { + int size; + unsigned long data[0]; +} cfs_bitmap_t; + +#define CFS_BITMAP_SIZE(nbits) \ + (((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(cfs_bitmap_t)) + +static inline +cfs_bitmap_t *CFS_ALLOCATE_BITMAP(int size) +{ + cfs_bitmap_t *ptr; + + OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size)); + if (ptr == NULL) + RETURN(ptr); + + ptr->size = size; + + RETURN (ptr); +} + +#define CFS_FREE_BITMAP(ptr) OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size)) + +static inline +void cfs_bitmap_set(cfs_bitmap_t *bitmap, int nbit) +{ + set_bit(nbit, bitmap->data); +} + +static inline +void cfs_bitmap_clear(cfs_bitmap_t *bitmap, int nbit) +{ + test_and_clear_bit(nbit, bitmap->data); +} + +static inline +int cfs_bitmap_check(cfs_bitmap_t *bitmap, int nbit) +{ + return test_bit(nbit, bitmap->data); +} + +static inline +int cfs_bitmap_test_and_clear(cfs_bitmap_t *bitmap, int nbit) +{ + return test_and_clear_bit(nbit, bitmap->data); +} + +/* return 0 is bitmap has none set bits */ +static inline +int cfs_bitmap_check_empty(cfs_bitmap_t *bitmap) +{ + return find_first_bit(bitmap->data, bitmap->size) == bitmap->size; +} + +static inline +void cfs_bitmap_copy(cfs_bitmap_t *new, cfs_bitmap_t *old) +{ + int newsize; + + LASSERT(new->size >= old->size); + newsize = new->size; + memcpy(new, old, CFS_BITMAP_SIZE(old->size)); + new->size = newsize; +} + +#define cfs_foreach_bit(bitmap, pos) \ + for ((pos) = find_first_bit((bitmap)->data, bitmap->size); \ + (pos) < (bitmap)->size; \ + (pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos) + 1)) + +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/curproc.h b/drivers/staging/lustre/include/linux/libcfs/curproc.h new file mode 100644 index 000000000000..90d7ce630e94 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/curproc.h @@ -0,0 +1,110 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/curproc.h + * + * Lustre curproc API declaration + * + * Author: Nikita Danilov <nikita@clusterfs.com> + */ + +#ifndef __LIBCFS_CURPROC_H__ +#define __LIBCFS_CURPROC_H__ + +/* + * Portable API to access common characteristics of "current" UNIX process. + * + * Implemented in portals/include/libcfs/<os>/ + */ +int cfs_curproc_groups_nr(void); +int current_is_in_group(gid_t group); +void cfs_curproc_groups_dump(gid_t *array, int size); + +/* + * Plus, platform-specific constant + * + * CFS_CURPROC_COMM_MAX, + * + * and opaque scalar type + * + * kernel_cap_t + */ + +/* check if task is running in compat mode.*/ +int current_is_32bit(void); +#define current_pid() (current->pid) +#define current_comm() (current->comm) +int cfs_get_environ(const char *key, char *value, int *val_len); + +typedef __u32 cfs_cap_t; + +#define CFS_CAP_CHOWN 0 +#define CFS_CAP_DAC_OVERRIDE 1 +#define CFS_CAP_DAC_READ_SEARCH 2 +#define CFS_CAP_FOWNER 3 +#define CFS_CAP_FSETID 4 +#define CFS_CAP_LINUX_IMMUTABLE 9 +#define CFS_CAP_SYS_ADMIN 21 +#define CFS_CAP_SYS_BOOT 23 +#define CFS_CAP_SYS_RESOURCE 24 + +#define CFS_CAP_FS_MASK ((1 << CFS_CAP_CHOWN) | \ + (1 << CFS_CAP_DAC_OVERRIDE) | \ + (1 << CFS_CAP_DAC_READ_SEARCH) | \ + (1 << CFS_CAP_FOWNER) | \ + (1 << CFS_CAP_FSETID ) | \ + (1 << CFS_CAP_LINUX_IMMUTABLE) | \ + (1 << CFS_CAP_SYS_ADMIN) | \ + (1 << CFS_CAP_SYS_BOOT) | \ + (1 << CFS_CAP_SYS_RESOURCE)) + +void cfs_cap_raise(cfs_cap_t cap); +void cfs_cap_lower(cfs_cap_t cap); +int cfs_cap_raised(cfs_cap_t cap); +cfs_cap_t cfs_curproc_cap_pack(void); +void cfs_curproc_cap_unpack(cfs_cap_t cap); +int cfs_capable(cfs_cap_t cap); + +/* __LIBCFS_CURPROC_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h new file mode 100644 index 000000000000..1ab1f2be9aa5 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h @@ -0,0 +1,234 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LIBCFS_LIBCFS_H__ +#define __LIBCFS_LIBCFS_H__ + +#if !__GNUC__ +#define __attribute__(x) +#endif + +#include <linux/libcfs/linux/libcfs.h> + +#include "curproc.h" + +#ifndef offsetof +# define offsetof(typ,memb) ((long)(long_ptr_t)((char *)&(((typ *)0)->memb))) +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof ((a)[0]))) +#endif + +#if !defined(swap) +#define swap(x,y) do { typeof(x) z = x; x = y; y = z; } while (0) +#endif + +#if !defined(container_of) +/* given a pointer @ptr to the field @member embedded into type (usually + * struct) @type, return pointer to the embedding instance of @type. */ +#define container_of(ptr, type, member) \ + ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) +#endif + +static inline int __is_po2(unsigned long long val) +{ + return !(val & (val - 1)); +} + +#define IS_PO2(val) __is_po2((unsigned long long)(val)) + +#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) + +/* + * Lustre Error Checksum: calculates checksum + * of Hex number by XORing each bit. + */ +#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \ + ((hexnum) >> 8 & 0xf)) + + +/* + * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses + * Lustre RETURN(NULL) macro. + */ +#if defined(NULL) +#undef NULL +#endif + +#define NULL ((void *)0) + +#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID + + +#include <linux/list.h> + +#ifndef cfs_for_each_possible_cpu +# error cfs_for_each_possible_cpu is not supported by kernel! +#endif + +/* libcfs tcpip */ +int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask); +int libcfs_ipif_enumerate(char ***names); +void libcfs_ipif_free_enumeration(char **names, int n); +int libcfs_sock_listen(socket_t **sockp, __u32 ip, int port, int backlog); +int libcfs_sock_accept(socket_t **newsockp, socket_t *sock); +void libcfs_sock_abort_accept(socket_t *sock); +int libcfs_sock_connect(socket_t **sockp, int *fatal, + __u32 local_ip, int local_port, + __u32 peer_ip, int peer_port); +int libcfs_sock_setbuf(socket_t *socket, int txbufsize, int rxbufsize); +int libcfs_sock_getbuf(socket_t *socket, int *txbufsize, int *rxbufsize); +int libcfs_sock_getaddr(socket_t *socket, int remote, __u32 *ip, int *port); +int libcfs_sock_write(socket_t *sock, void *buffer, int nob, int timeout); +int libcfs_sock_read(socket_t *sock, void *buffer, int nob, int timeout); +void libcfs_sock_release(socket_t *sock); + +/* libcfs watchdogs */ +struct lc_watchdog; + +/* Add a watchdog which fires after "time" milliseconds of delay. You have to + * touch it once to enable it. */ +struct lc_watchdog *lc_watchdog_add(int time, + void (*cb)(pid_t pid, void *), + void *data); + +/* Enables a watchdog and resets its timer. */ +void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout); +#define CFS_GET_TIMEOUT(svc) (max_t(int, obd_timeout, \ + AT_OFF ? 0 : at_get(&svc->srv_at_estimate)) * \ + svc->srv_watchdog_factor) + +/* Disable a watchdog; touch it to restart it. */ +void lc_watchdog_disable(struct lc_watchdog *lcw); + +/* Clean up the watchdog */ +void lc_watchdog_delete(struct lc_watchdog *lcw); + +/* Dump a debug log */ +void lc_watchdog_dumplog(pid_t pid, void *data); + + +/* need both kernel and user-land acceptor */ +#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512 +#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023 + +/* + * libcfs pseudo device operations + * + * struct psdev_t and + * misc_register() and + * misc_deregister() are declared in + * libcfs/<os>/<os>-prim.h + * + * It's just draft now. + */ + +struct cfs_psdev_file { + unsigned long off; + void *private_data; + unsigned long reserved1; + unsigned long reserved2; +}; + +struct cfs_psdev_ops { + int (*p_open)(unsigned long, void *); + int (*p_close)(unsigned long, void *); + int (*p_read)(struct cfs_psdev_file *, char *, unsigned long); + int (*p_write)(struct cfs_psdev_file *, char *, unsigned long); + int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *); +}; + +/* + * Drop into debugger, if possible. Implementation is provided by platform. + */ + +void cfs_enter_debugger(void); + +/* + * Defined by platform + */ +int unshare_fs_struct(void); +sigset_t cfs_get_blocked_sigs(void); +sigset_t cfs_block_allsigs(void); +sigset_t cfs_block_sigs(unsigned long sigs); +sigset_t cfs_block_sigsinv(unsigned long sigs); +void cfs_restore_sigs(sigset_t); +int cfs_signal_pending(void); +void cfs_clear_sigpending(void); + +/* + * Random number handling + */ + +/* returns a random 32-bit integer */ +unsigned int cfs_rand(void); +/* seed the generator */ +void cfs_srand(unsigned int, unsigned int); +void cfs_get_random_bytes(void *buf, int size); + +#include <linux/libcfs/libcfs_debug.h> +#include <linux/libcfs/libcfs_cpu.h> +#include <linux/libcfs/libcfs_private.h> +#include <linux/libcfs/libcfs_ioctl.h> +#include <linux/libcfs/libcfs_prim.h> +#include <linux/libcfs/libcfs_time.h> +#include <linux/libcfs/libcfs_string.h> +#include <linux/libcfs/libcfs_kernelcomm.h> +#include <linux/libcfs/libcfs_workitem.h> +#include <linux/libcfs/libcfs_hash.h> +#include <linux/libcfs/libcfs_heap.h> +#include <linux/libcfs/libcfs_fail.h> +#include <linux/libcfs/params_tree.h> +#include <linux/libcfs/libcfs_crypto.h> + +/* container_of depends on "likely" which is defined in libcfs_private.h */ +static inline void *__container_of(void *ptr, unsigned long shift) +{ + if (unlikely(IS_ERR(ptr) || ptr == NULL)) + return ptr; + else + return (char *)ptr - shift; +} + +#define container_of0(ptr, type, member) \ + ((type *)__container_of((void *)(ptr), offsetof(type, member))) + +#define SET_BUT_UNUSED(a) do { } while(sizeof(a) - sizeof(a)) + +#define _LIBCFS_H + +#endif /* _LIBCFS_H */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h new file mode 100644 index 000000000000..6ae7415a3b99 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h @@ -0,0 +1,214 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_cpu.h + * + * CPU partition + * . CPU partition is virtual processing unit + * + * . CPU partition can present 1-N cores, or 1-N NUMA nodes, + * in other words, CPU partition is a processors pool. + * + * CPU Partition Table (CPT) + * . a set of CPU partitions + * + * . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP + * + * . User can specify total number of CPU partitions while creating a + * CPT, ID of CPU partition is always start from 0. + * + * Example: if there are 8 cores on the system, while creating a CPT + * with cpu_npartitions=4: + * core[0, 1] = partition[0], core[2, 3] = partition[1] + * core[4, 5] = partition[2], core[6, 7] = partition[3] + * + * cpu_npartitions=1: + * core[0, 1, ... 7] = partition[0] + * + * . User can also specify CPU partitions by string pattern + * + * Examples: cpu_partitions="0[0,1], 1[2,3]" + * cpu_partitions="N 0[0-3], 1[4-8]" + * + * The first character "N" means following numbers are numa ID + * + * . NUMA allocators, CPU affinity threads are built over CPU partitions, + * instead of HW CPUs or HW nodes. + * + * . By default, Lustre modules should refer to the global cfs_cpt_table, + * instead of accessing HW CPUs directly, so concurrency of Lustre can be + * configured by cpu_npartitions of the global cfs_cpt_table + * + * . If cpu_npartitions=1(all CPUs in one pool), lustre should work the + * same way as 2.2 or earlier versions + * + * Author: liang@whamcloud.com + */ + +#ifndef __LIBCFS_CPU_H__ +#define __LIBCFS_CPU_H__ + +#ifndef HAVE_LIBCFS_CPT + +typedef unsigned long cpumask_t; +typedef unsigned long nodemask_t; + +struct cfs_cpt_table { + /* # of CPU partitions */ + int ctb_nparts; + /* cpu mask */ + cpumask_t ctb_mask; + /* node mask */ + nodemask_t ctb_nodemask; + /* version */ + __u64 ctb_version; +}; + +#endif /* !HAVE_LIBCFS_CPT */ + +/* any CPU partition */ +#define CFS_CPT_ANY (-1) + +extern struct cfs_cpt_table *cfs_cpt_table; + +/** + * destroy a CPU partition table + */ +void cfs_cpt_table_free(struct cfs_cpt_table *cptab); +/** + * create a cfs_cpt_table with \a ncpt number of partitions + */ +struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt); +/** + * print string information of cpt-table + */ +int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len); +/** + * return total number of CPU partitions in \a cptab + */ +int +cfs_cpt_number(struct cfs_cpt_table *cptab); +/** + * return number of HW cores or hypter-threadings in a CPU partition \a cpt + */ +int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt); +/** + * is there any online CPU in CPU partition \a cpt + */ +int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt); +/** + * return cpumask of CPU partition \a cpt + */ +cpumask_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt); +/** + * return nodemask of CPU partition \a cpt + */ +nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt); +/** + * shadow current HW processor ID to CPU-partition ID of \a cptab + */ +int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap); +/** + * shadow HW processor ID \a CPU to CPU-partition ID by \a cptab + */ +int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu); +/** + * bind current thread on a CPU-partition \a cpt of \a cptab + */ +int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt); +/** + * add \a cpu to CPU partion @cpt of \a cptab, return 1 for success, + * otherwise 0 is returned + */ +int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu); +/** + * remove \a cpu from CPU partition \a cpt of \a cptab + */ +void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu); +/** + * add all cpus in \a mask to CPU partition \a cpt + * return 1 if successfully set all CPUs, otherwise return 0 + */ +int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, + int cpt, cpumask_t *mask); +/** + * remove all cpus in \a mask from CPU partition \a cpt + */ +void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, + int cpt, cpumask_t *mask); +/** + * add all cpus in NUMA node \a node to CPU partition \a cpt + * return 1 if successfully set all CPUs, otherwise return 0 + */ +int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node); +/** + * remove all cpus in NUMA node \a node from CPU partition \a cpt + */ +void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node); + +/** + * add all cpus in node mask \a mask to CPU partition \a cpt + * return 1 if successfully set all CPUs, otherwise return 0 + */ +int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, + int cpt, nodemask_t *mask); +/** + * remove all cpus in node mask \a mask from CPU partition \a cpt + */ +void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, + int cpt, nodemask_t *mask); +/** + * unset all cpus for CPU partition \a cpt + */ +void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt); +/** + * convert partition id \a cpt to numa node id, if there are more than one + * nodes in this partition, it might return a different node id each time. + */ +int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt); + +/** + * iterate over all CPU partitions in \a cptab + */ +#define cfs_cpt_for_each(i, cptab) \ + for (i = 0; i < cfs_cpt_number(cptab); i++) + +#ifndef __read_mostly +# define __read_mostly +#endif + +#ifndef ____cacheline_aligned +#define ____cacheline_aligned +#endif + +int cfs_cpu_init(void); +void cfs_cpu_fini(void); + +#endif /* __LIBCFS_CPU_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h new file mode 100644 index 000000000000..64ca62f0cc93 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h @@ -0,0 +1,201 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + */ + +#ifndef _LIBCFS_CRYPTO_H +#define _LIBCFS_CRYPTO_H + +struct cfs_crypto_hash_type { + char *cht_name; /**< hash algorithm name, equal to + * format name for crypto api */ + unsigned int cht_key; /**< init key by default (vaild for + * 4 bytes context like crc32, adler */ + unsigned int cht_size; /**< hash digest size */ +}; + +enum cfs_crypto_hash_alg { + CFS_HASH_ALG_NULL = 0, + CFS_HASH_ALG_ADLER32, + CFS_HASH_ALG_CRC32, + CFS_HASH_ALG_MD5, + CFS_HASH_ALG_SHA1, + CFS_HASH_ALG_SHA256, + CFS_HASH_ALG_SHA384, + CFS_HASH_ALG_SHA512, + CFS_HASH_ALG_CRC32C, + CFS_HASH_ALG_MAX +}; + +static struct cfs_crypto_hash_type hash_types[] = { + [CFS_HASH_ALG_NULL] = { "null", 0, 0 }, + [CFS_HASH_ALG_ADLER32] = { "adler32", 1, 4 }, + [CFS_HASH_ALG_CRC32] = { "crc32", ~0, 4 }, + [CFS_HASH_ALG_CRC32C] = { "crc32c", ~0, 4 }, + [CFS_HASH_ALG_MD5] = { "md5", 0, 16 }, + [CFS_HASH_ALG_SHA1] = { "sha1", 0, 20 }, + [CFS_HASH_ALG_SHA256] = { "sha256", 0, 32 }, + [CFS_HASH_ALG_SHA384] = { "sha384", 0, 48 }, + [CFS_HASH_ALG_SHA512] = { "sha512", 0, 64 }, +}; + +/** Return pointer to type of hash for valid hash algorithm identifier */ +static inline const struct cfs_crypto_hash_type * + cfs_crypto_hash_type(unsigned char hash_alg) +{ + struct cfs_crypto_hash_type *ht; + + if (hash_alg < CFS_HASH_ALG_MAX) { + ht = &hash_types[hash_alg]; + if (ht->cht_name) + return ht; + } + return NULL; +} + +/** Return hash name for valid hash algorithm identifier or "unknown" */ +static inline const char *cfs_crypto_hash_name(unsigned char hash_alg) +{ + const struct cfs_crypto_hash_type *ht; + + ht = cfs_crypto_hash_type(hash_alg); + if (ht) + return ht->cht_name; + else + return "unknown"; +} + +/** Return digest size for valid algorithm identifier or 0 */ +static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg) +{ + const struct cfs_crypto_hash_type *ht; + + ht = cfs_crypto_hash_type(hash_alg); + if (ht) + return ht->cht_size; + else + return 0; +} + +/** Return hash identifier for valid hash algorithm name or 0xFF */ +static inline unsigned char cfs_crypto_hash_alg(const char *algname) +{ + unsigned char i; + + for (i = 0; i < CFS_HASH_ALG_MAX; i++) + if (!strcmp(hash_types[i].cht_name, algname)) + break; + return (i == CFS_HASH_ALG_MAX ? 0xFF : i); +} + +/** Calculate hash digest for buffer. + * @param alg id of hash algorithm + * @param buf buffer of data + * @param buf_len buffer len + * @param key initial value for algorithm, if it is NULL, + * default initial value should be used. + * @param key_len len of initial value + * @param hash [out] pointer to hash, if it is NULL, hash_len is + * set to valid digest size in bytes, retval -ENOSPC. + * @param hash_len [in,out] size of hash buffer + * @returns status of operation + * @retval -EINVAL if buf, buf_len, hash_len or alg_id is invalid + * @retval -ENODEV if this algorithm is unsupported + * @retval -ENOSPC if pointer to hash is NULL, or hash_len less than + * digest size + * @retval 0 for success + * @retval < 0 other errors from lower layers. + */ +int cfs_crypto_hash_digest(unsigned char alg, + const void *buf, unsigned int buf_len, + unsigned char *key, unsigned int key_len, + unsigned char *hash, unsigned int *hash_len); + +/* cfs crypto hash descriptor */ +struct cfs_crypto_hash_desc; + +/** Allocate and initialize desriptor for hash algorithm. + * @param alg algorithm id + * @param key initial value for algorithm, if it is NULL, + * default initial value should be used. + * @param key_len len of initial value + * @returns pointer to descriptor of hash instance + * @retval ERR_PTR(error) when errors occured. + */ +struct cfs_crypto_hash_desc* + cfs_crypto_hash_init(unsigned char alg, + unsigned char *key, unsigned int key_len); + +/** Update digest by part of data. + * @param desc hash descriptor + * @param page data page + * @param offset data offset + * @param len data len + * @returns status of operation + * @retval 0 for success. + */ +int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *desc, + struct page *page, unsigned int offset, + unsigned int len); + +/** Update digest by part of data. + * @param desc hash descriptor + * @param buf pointer to data buffer + * @param buf_len size of data at buffer + * @returns status of operation + * @retval 0 for success. + */ +int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *desc, const void *buf, + unsigned int buf_len); + +/** Finalize hash calculation, copy hash digest to buffer, destroy hash + * descriptor. + * @param desc hash descriptor + * @param hash buffer pointer to store hash digest + * @param hash_len pointer to hash buffer size, if NULL + * destory hash descriptor + * @returns status of operation + * @retval -ENOSPC if hash is NULL, or *hash_len less than + * digest size + * @retval 0 for success + * @retval < 0 other errors from lower layers. + */ +int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *desc, + unsigned char *hash, unsigned int *hash_len); +/** + * Register crypto hash algorithms + */ +int cfs_crypto_register(void); + +/** + * Unregister + */ +void cfs_crypto_unregister(void); + +/** Return hash speed in Mbytes per second for valid hash algorithm + * identifier. If test was unsuccessfull -1 would be return. + */ +int cfs_crypto_hash_speed(unsigned char hash_alg); +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h new file mode 100644 index 000000000000..dd8ac2f52c9f --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h @@ -0,0 +1,350 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_debug.h + * + * Debug messages and assertions + * + */ + +#ifndef __LIBCFS_DEBUG_H__ +#define __LIBCFS_DEBUG_H__ + +/* + * Debugging + */ +extern unsigned int libcfs_subsystem_debug; +extern unsigned int libcfs_stack; +extern unsigned int libcfs_debug; +extern unsigned int libcfs_printk; +extern unsigned int libcfs_console_ratelimit; +extern unsigned int libcfs_watchdog_ratelimit; +extern unsigned int libcfs_console_max_delay; +extern unsigned int libcfs_console_min_delay; +extern unsigned int libcfs_console_backoff; +extern unsigned int libcfs_debug_binary; +extern char libcfs_debug_file_path_arr[PATH_MAX]; + +int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys); +int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys); + +/* Has there been an LBUG? */ +extern unsigned int libcfs_catastrophe; +extern unsigned int libcfs_panic_on_lbug; + +/** + * Format for debug message headers + */ +struct ptldebug_header { + __u32 ph_len; + __u32 ph_flags; + __u32 ph_subsys; + __u32 ph_mask; + __u16 ph_cpu_id; + __u16 ph_type; + __u32 ph_sec; + __u64 ph_usec; + __u32 ph_stack; + __u32 ph_pid; + __u32 ph_extern_pid; + __u32 ph_line_num; +} __attribute__((packed)); + + +#define PH_FLAG_FIRST_RECORD 1 + +/* Debugging subsystems (32 bits, non-overlapping) */ +/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ +#define S_UNDEFINED 0x00000001 +#define S_MDC 0x00000002 +#define S_MDS 0x00000004 +#define S_OSC 0x00000008 +#define S_OST 0x00000010 +#define S_CLASS 0x00000020 +#define S_LOG 0x00000040 +#define S_LLITE 0x00000080 +#define S_RPC 0x00000100 +#define S_MGMT 0x00000200 +#define S_LNET 0x00000400 +#define S_LND 0x00000800 /* ALL LNDs */ +#define S_PINGER 0x00001000 +#define S_FILTER 0x00002000 +/* unused */ +#define S_ECHO 0x00008000 +#define S_LDLM 0x00010000 +#define S_LOV 0x00020000 +#define S_LQUOTA 0x00040000 +#define S_OSD 0x00080000 +/* unused */ +/* unused */ +/* unused */ +#define S_LMV 0x00800000 /* b_new_cmd */ +/* unused */ +#define S_SEC 0x02000000 /* upcall cache */ +#define S_GSS 0x04000000 /* b_new_cmd */ +/* unused */ +#define S_MGC 0x10000000 +#define S_MGS 0x20000000 +#define S_FID 0x40000000 /* b_new_cmd */ +#define S_FLD 0x80000000 /* b_new_cmd */ +/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ + +/* Debugging masks (32 bits, non-overlapping) */ +/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ +#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ +#define D_INODE 0x00000002 +#define D_SUPER 0x00000004 +#define D_EXT2 0x00000008 /* anything from ext2_debug */ +#define D_MALLOC 0x00000010 /* print malloc, free information */ +#define D_CACHE 0x00000020 /* cache-related items */ +#define D_INFO 0x00000040 /* general information */ +#define D_IOCTL 0x00000080 /* ioctl related information */ +#define D_NETERROR 0x00000100 /* network errors */ +#define D_NET 0x00000200 /* network communications */ +#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */ +#define D_BUFFS 0x00000800 +#define D_OTHER 0x00001000 +#define D_DENTRY 0x00002000 +#define D_NETTRACE 0x00004000 +#define D_PAGE 0x00008000 /* bulk page handling */ +#define D_DLMTRACE 0x00010000 +#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */ +#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ +#define D_HA 0x00080000 /* recovery and failover */ +#define D_RPCTRACE 0x00100000 /* for distributed debugging */ +#define D_VFSTRACE 0x00200000 +#define D_READA 0x00400000 /* read-ahead */ +#define D_MMAP 0x00800000 +#define D_CONFIG 0x01000000 +#define D_CONSOLE 0x02000000 +#define D_QUOTA 0x04000000 +#define D_SEC 0x08000000 +#define D_LFSCK 0x10000000 /* For both OI scrub and LFSCK */ +/* keep these in sync with lnet/{utils,libcfs}/debug.c */ + +#define D_HSM D_TRACE + +#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) + +#ifndef DEBUG_SUBSYSTEM +# define DEBUG_SUBSYSTEM S_UNDEFINED +#endif + +#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600)) /* jiffies */ +#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */ +#define CDEBUG_DEFAULT_BACKOFF 2 +typedef struct { + cfs_time_t cdls_next; + unsigned int cdls_delay; + int cdls_count; +} cfs_debug_limit_state_t; + +struct libcfs_debug_msg_data { + const char *msg_file; + const char *msg_fn; + int msg_subsys; + int msg_line; + int msg_mask; + cfs_debug_limit_state_t *msg_cdls; +}; + +#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls) \ +do { \ + (data)->msg_subsys = DEBUG_SUBSYSTEM; \ + (data)->msg_file = __FILE__; \ + (data)->msg_fn = __FUNCTION__; \ + (data)->msg_line = __LINE__; \ + (data)->msg_cdls = (cdls); \ + (data)->msg_mask = (mask); \ +} while (0) + +#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls) \ + static struct libcfs_debug_msg_data dataname = { \ + .msg_subsys = DEBUG_SUBSYSTEM, \ + .msg_file = __FILE__, \ + .msg_fn = __FUNCTION__, \ + .msg_line = __LINE__, \ + .msg_cdls = (cdls) }; \ + dataname.msg_mask = (mask); + + + +/** + * Filters out logging messages based on mask and subsystem. + */ +static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem) +{ + return mask & D_CANTMASK || + ((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem)); +} + +#define __CDEBUG(cdls, mask, format, ...) \ +do { \ + static struct libcfs_debug_msg_data msgdata; \ + \ + CFS_CHECK_STACK(&msgdata, mask, cdls); \ + \ + if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls); \ + libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__); \ + } \ +} while (0) + +#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__) + +#define CDEBUG_LIMIT(mask, format, ...) \ +do { \ + static cfs_debug_limit_state_t cdls; \ + \ + __CDEBUG(&cdls, mask, format, ## __VA_ARGS__);\ +} while (0) + + + + +#define CWARN(format, ...) CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__) +#define CERROR(format, ...) CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__) +#define CNETERR(format, a...) CDEBUG_LIMIT(D_NETERROR, format, ## a) +#define CEMERG(format, ...) CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__) + +#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__) +#define LCONSOLE_INFO(format, ...) CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__) +#define LCONSOLE_WARN(format, ...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__) +#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \ + "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__) +#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__) + +#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__) + + +void libcfs_log_goto(struct libcfs_debug_msg_data *, const char *, long_ptr_t); +#define GOTO(label, rc) \ +do { \ + if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \ + libcfs_log_goto(&msgdata, #label, (long_ptr_t)(rc)); \ + } else { \ + (void)(rc); \ + } \ + goto label; \ +} while (0) + + +/* + * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise + * there will be a warning in osx. + */ +#if defined(__GNUC__) + +long libcfs_log_return(struct libcfs_debug_msg_data *, long rc); +#if BITS_PER_LONG > 32 +#define RETURN(rc) \ +do { \ + EXIT_NESTING; \ + if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \ + return (typeof(rc))libcfs_log_return(&msgdata, \ + (long)(rc)); \ + } \ + \ + return (rc); \ +} while (0) +#else /* BITS_PER_LONG == 32 */ +/* We need an on-stack variable, because we cannot case a 32-bit pointer + * directly to (long long) without generating a complier warning/error, yet + * casting directly to (long) will truncate 64-bit return values. The log + * values will print as 32-bit values, but they always have been. LU-1436 + */ +#define RETURN(rc) \ +do { \ + EXIT_NESTING; \ + if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \ + typeof(rc) __rc = (rc); \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \ + libcfs_log_return(&msgdata, (long_ptr_t)__rc); \ + return __rc; \ + } \ + \ + return (rc); \ +} while (0) +#endif /* BITS_PER_LONG > 32 */ + +#elif defined(_MSC_VER) +#define RETURN(rc) \ +do { \ + CDEBUG(D_TRACE, "Process leaving.\n"); \ + EXIT_NESTING; \ + return (rc); \ +} while (0) +#else +# error "Unkown compiler" +#endif /* __GNUC__ */ + +#define ENTRY \ +ENTRY_NESTING; \ +do { \ + CDEBUG(D_TRACE, "Process entered\n"); \ +} while (0) + +#define EXIT \ +do { \ + CDEBUG(D_TRACE, "Process leaving\n"); \ + EXIT_NESTING; \ +} while(0) + +#define RETURN_EXIT \ +do { \ + EXIT; \ + return; \ +} while (0) + +extern int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, + const char *format1, ...) + __attribute__ ((format (printf, 2, 3))); + +extern int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, + const char *format1, + va_list args, const char *format2, ...) + __attribute__ ((format (printf, 4, 5))); + +/* other external symbols that tracefile provides: */ +extern int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, + const char *usr_buffer, int usr_buffer_nob); +extern int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob, + const char *knl_buffer, char *append); + +#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log" + +#endif /* __LIBCFS_DEBUG_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h new file mode 100644 index 000000000000..8393c2703ce6 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h @@ -0,0 +1,170 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores, + * CA 94065 USA or visit www.oracle.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Oracle Corporation, Inc. + */ + +#ifndef _LIBCFS_FAIL_H +#define _LIBCFS_FAIL_H + +extern unsigned long cfs_fail_loc; +extern unsigned int cfs_fail_val; + +extern wait_queue_head_t cfs_race_waitq; +extern int cfs_race_state; + +int __cfs_fail_check_set(__u32 id, __u32 value, int set); +int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set); + +enum { + CFS_FAIL_LOC_NOSET = 0, + CFS_FAIL_LOC_ORSET = 1, + CFS_FAIL_LOC_RESET = 2, + CFS_FAIL_LOC_VALUE = 3 +}; + +/* Failure injection control */ +#define CFS_FAIL_MASK_SYS 0x0000FF00 +#define CFS_FAIL_MASK_LOC (0x000000FF | CFS_FAIL_MASK_SYS) + +#define CFS_FAILED_BIT 30 +/* CFS_FAILED is 0x40000000 */ +#define CFS_FAILED (1 << CFS_FAILED_BIT) + +#define CFS_FAIL_ONCE_BIT 31 +/* CFS_FAIL_ONCE is 0x80000000 */ +#define CFS_FAIL_ONCE (1 << CFS_FAIL_ONCE_BIT) + +/* The following flags aren't made to be combined */ +#define CFS_FAIL_SKIP 0x20000000 /* skip N times then fail */ +#define CFS_FAIL_SOME 0x10000000 /* only fail N times */ +#define CFS_FAIL_RAND 0x08000000 /* fail 1/N of the times */ +#define CFS_FAIL_USR1 0x04000000 /* user flag */ + +#define CFS_FAIL_PRECHECK(id) (cfs_fail_loc && \ + (cfs_fail_loc & CFS_FAIL_MASK_LOC) == \ + ((id) & CFS_FAIL_MASK_LOC)) + +static inline int cfs_fail_check_set(__u32 id, __u32 value, + int set, int quiet) +{ + int ret = 0; + + if (unlikely(CFS_FAIL_PRECHECK(id) && + (ret = __cfs_fail_check_set(id, value, set)))) { + if (quiet) { + CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n", + id, value); + } else { + LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n", + id, value); + } + } + + return ret; +} + +/* If id hit cfs_fail_loc, return 1, otherwise return 0 */ +#define CFS_FAIL_CHECK(id) \ + cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0) +#define CFS_FAIL_CHECK_QUIET(id) \ + cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1) + +/* If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1, + * otherwise return 0 */ +#define CFS_FAIL_CHECK_VALUE(id, value) \ + cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0) +#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \ + cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1) + +/* If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1, + * otherwise return 0 */ +#define CFS_FAIL_CHECK_ORSET(id, value) \ + cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0) +#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \ + cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1) + +/* If id hit cfs_fail_loc, cfs_fail_loc = value and return 1, + * otherwise return 0 */ +#define CFS_FAIL_CHECK_RESET(id, value) \ + cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0) +#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \ + cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1) + +static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) +{ + if (unlikely(CFS_FAIL_PRECHECK(id))) + return __cfs_fail_timeout_set(id, value, ms, set); + else + return 0; +} + +/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */ +#define CFS_FAIL_TIMEOUT(id, secs) \ + cfs_fail_timeout_set(id, 0, secs * 1000, CFS_FAIL_LOC_NOSET) + +#define CFS_FAIL_TIMEOUT_MS(id, ms) \ + cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET) + +/* If id hit cfs_fail_loc, cfs_fail_loc |= value and + * sleep seconds or milliseconds */ +#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \ + cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET) + +#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \ + cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET) + +/* The idea here is to synchronise two threads to force a race. The + * first thread that calls this with a matching fail_loc is put to + * sleep. The next thread that calls with the same fail_loc wakes up + * the first and continues. */ +static inline void cfs_race(__u32 id) +{ + + if (CFS_FAIL_PRECHECK(id)) { + if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) { + int rc; + cfs_race_state = 0; + CERROR("cfs_race id %x sleeping\n", id); + cfs_wait_event_interruptible(cfs_race_waitq, + cfs_race_state != 0, rc); + CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc); + } else { + CERROR("cfs_fail_race id %x waking\n", id); + cfs_race_state = 1; + wake_up(&cfs_race_waitq); + } + } +} +#define CFS_RACE(id) cfs_race(id) + +#endif /* _LIBCFS_FAIL_H */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h new file mode 100644 index 000000000000..f6361b3f0a0c --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h @@ -0,0 +1,851 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_hash.h + * + * Hashing routines + * + */ + +#ifndef __LIBCFS_HASH_H__ +#define __LIBCFS_HASH_H__ +/* + * Knuth recommends primes in approximately golden ratio to the maximum + * integer representable by a machine word for multiplicative hashing. + * Chuck Lever verified the effectiveness of this technique: + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * + * These primes are chosen to be bit-sparse, that is operations on + * them can use shifts and additions instead of multiplications for + * machines where multiplications are slow. + */ +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL + +/* + * Ideally we would use HAVE_HASH_LONG for this, but on linux we configure + * the linux kernel and user space at the same time, so we need to differentiate + * between them explicitely. If this is not needed on other architectures, then + * we'll need to move the functions to archi specific headers. + */ + +#include <linux/hash.h> + +#define cfs_hash_long(val, bits) hash_long(val, bits) + +/** disable debug */ +#define CFS_HASH_DEBUG_NONE 0 +/** record hash depth and output to console when it's too deep, + * computing overhead is low but consume more memory */ +#define CFS_HASH_DEBUG_1 1 +/** expensive, check key validation */ +#define CFS_HASH_DEBUG_2 2 + +#define CFS_HASH_DEBUG_LEVEL CFS_HASH_DEBUG_NONE + +struct cfs_hash_ops; +struct cfs_hash_lock_ops; +struct cfs_hash_hlist_ops; + +typedef union { + rwlock_t rw; /**< rwlock */ + spinlock_t spin; /**< spinlock */ +} cfs_hash_lock_t; + +/** + * cfs_hash_bucket is a container of: + * - lock, couter ... + * - array of hash-head starting from hsb_head[0], hash-head can be one of + * . cfs_hash_head_t + * . cfs_hash_head_dep_t + * . cfs_hash_dhead_t + * . cfs_hash_dhead_dep_t + * which depends on requirement of user + * - some extra bytes (caller can require it while creating hash) + */ +typedef struct cfs_hash_bucket { + cfs_hash_lock_t hsb_lock; /**< bucket lock */ + __u32 hsb_count; /**< current entries */ + __u32 hsb_version; /**< change version */ + unsigned int hsb_index; /**< index of bucket */ + int hsb_depmax; /**< max depth on bucket */ + long hsb_head[0]; /**< hash-head array */ +} cfs_hash_bucket_t; + +/** + * cfs_hash bucket descriptor, it's normally in stack of caller + */ +typedef struct cfs_hash_bd { + cfs_hash_bucket_t *bd_bucket; /**< address of bucket */ + unsigned int bd_offset; /**< offset in bucket */ +} cfs_hash_bd_t; + +#define CFS_HASH_NAME_LEN 16 /**< default name length */ +#define CFS_HASH_BIGNAME_LEN 64 /**< bigname for param tree */ + +#define CFS_HASH_BKT_BITS 3 /**< default bits of bucket */ +#define CFS_HASH_BITS_MAX 30 /**< max bits of bucket */ +#define CFS_HASH_BITS_MIN CFS_HASH_BKT_BITS + +/** + * common hash attributes. + */ +enum cfs_hash_tag { + /** + * don't need any lock, caller will protect operations with it's + * own lock. With this flag: + * . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK + * will be ignored. + * . Some functions will be disabled with this flag, i.e: + * cfs_hash_for_each_empty, cfs_hash_rehash + */ + CFS_HASH_NO_LOCK = 1 << 0, + /** no bucket lock, use one spinlock to protect the whole hash */ + CFS_HASH_NO_BKTLOCK = 1 << 1, + /** rwlock to protect bucket */ + CFS_HASH_RW_BKTLOCK = 1 << 2, + /** spinlcok to protect bucket */ + CFS_HASH_SPIN_BKTLOCK = 1 << 3, + /** always add new item to tail */ + CFS_HASH_ADD_TAIL = 1 << 4, + /** hash-table doesn't have refcount on item */ + CFS_HASH_NO_ITEMREF = 1 << 5, + /** big name for param-tree */ + CFS_HASH_BIGNAME = 1 << 6, + /** track global count */ + CFS_HASH_COUNTER = 1 << 7, + /** rehash item by new key */ + CFS_HASH_REHASH_KEY = 1 << 8, + /** Enable dynamic hash resizing */ + CFS_HASH_REHASH = 1 << 9, + /** can shrink hash-size */ + CFS_HASH_SHRINK = 1 << 10, + /** assert hash is empty on exit */ + CFS_HASH_ASSERT_EMPTY = 1 << 11, + /** record hlist depth */ + CFS_HASH_DEPTH = 1 << 12, + /** + * rehash is always scheduled in a different thread, so current + * change on hash table is non-blocking + */ + CFS_HASH_NBLK_CHANGE = 1 << 13, + /** NB, we typed hs_flags as __u16, please change it + * if you need to extend >=16 flags */ +}; + +/** most used attributes */ +#define CFS_HASH_DEFAULT (CFS_HASH_RW_BKTLOCK | \ + CFS_HASH_COUNTER | CFS_HASH_REHASH) + +/** + * cfs_hash is a hash-table implementation for general purpose, it can support: + * . two refcount modes + * hash-table with & without refcount + * . four lock modes + * nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock + * . general operations + * lookup, add(add_tail or add_head), delete + * . rehash + * grows or shrink + * . iteration + * locked iteration and unlocked iteration + * . bigname + * support long name hash + * . debug + * trace max searching depth + * + * Rehash: + * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker) + * is spawned to handle the rehash in the background, it's possible that other + * processes can concurrently perform additions, deletions, and lookups + * without being blocked on rehash completion, because rehash will release + * the global wrlock for each bucket. + * + * rehash and iteration can't run at the same time because it's too tricky + * to keep both of them safe and correct. + * As they are relatively rare operations, so: + * . if iteration is in progress while we try to launch rehash, then + * it just giveup, iterator will launch rehash at the end. + * . if rehash is in progress while we try to iterate the hash table, + * then we just wait (shouldn't be very long time), anyway, nobody + * should expect iteration of whole hash-table to be non-blocking. + * + * During rehashing, a (key,object) pair may be in one of two buckets, + * depending on whether the worker task has yet to transfer the object + * to its new location in the table. Lookups and deletions need to search both + * locations; additions must take care to only insert into the new bucket. + */ + +typedef struct cfs_hash { + /** serialize with rehash, or serialize all operations if + * the hash-table has CFS_HASH_NO_BKTLOCK */ + cfs_hash_lock_t hs_lock; + /** hash operations */ + struct cfs_hash_ops *hs_ops; + /** hash lock operations */ + struct cfs_hash_lock_ops *hs_lops; + /** hash list operations */ + struct cfs_hash_hlist_ops *hs_hops; + /** hash buckets-table */ + cfs_hash_bucket_t **hs_buckets; + /** total number of items on this hash-table */ + atomic_t hs_count; + /** hash flags, see cfs_hash_tag for detail */ + __u16 hs_flags; + /** # of extra-bytes for bucket, for user saving extended attributes */ + __u16 hs_extra_bytes; + /** wants to iterate */ + __u8 hs_iterating; + /** hash-table is dying */ + __u8 hs_exiting; + /** current hash bits */ + __u8 hs_cur_bits; + /** min hash bits */ + __u8 hs_min_bits; + /** max hash bits */ + __u8 hs_max_bits; + /** bits for rehash */ + __u8 hs_rehash_bits; + /** bits for each bucket */ + __u8 hs_bkt_bits; + /** resize min threshold */ + __u16 hs_min_theta; + /** resize max threshold */ + __u16 hs_max_theta; + /** resize count */ + __u32 hs_rehash_count; + /** # of iterators (caller of cfs_hash_for_each_*) */ + __u32 hs_iterators; + /** rehash workitem */ + cfs_workitem_t hs_rehash_wi; + /** refcount on this hash table */ + atomic_t hs_refcount; + /** rehash buckets-table */ + cfs_hash_bucket_t **hs_rehash_buckets; +#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 + /** serialize debug members */ + spinlock_t hs_dep_lock; + /** max depth */ + unsigned int hs_dep_max; + /** id of the deepest bucket */ + unsigned int hs_dep_bkt; + /** offset in the deepest bucket */ + unsigned int hs_dep_off; + /** bits when we found the max depth */ + unsigned int hs_dep_bits; + /** workitem to output max depth */ + cfs_workitem_t hs_dep_wi; +#endif + /** name of htable */ + char hs_name[0]; +} cfs_hash_t; + +typedef struct cfs_hash_lock_ops { + /** lock the hash table */ + void (*hs_lock)(cfs_hash_lock_t *lock, int exclusive); + /** unlock the hash table */ + void (*hs_unlock)(cfs_hash_lock_t *lock, int exclusive); + /** lock the hash bucket */ + void (*hs_bkt_lock)(cfs_hash_lock_t *lock, int exclusive); + /** unlock the hash bucket */ + void (*hs_bkt_unlock)(cfs_hash_lock_t *lock, int exclusive); +} cfs_hash_lock_ops_t; + +typedef struct cfs_hash_hlist_ops { + /** return hlist_head of hash-head of @bd */ + struct hlist_head *(*hop_hhead)(cfs_hash_t *hs, cfs_hash_bd_t *bd); + /** return hash-head size */ + int (*hop_hhead_size)(cfs_hash_t *hs); + /** add @hnode to hash-head of @bd */ + int (*hop_hnode_add)(cfs_hash_t *hs, + cfs_hash_bd_t *bd, struct hlist_node *hnode); + /** remove @hnode from hash-head of @bd */ + int (*hop_hnode_del)(cfs_hash_t *hs, + cfs_hash_bd_t *bd, struct hlist_node *hnode); +} cfs_hash_hlist_ops_t; + +typedef struct cfs_hash_ops { + /** return hashed value from @key */ + unsigned (*hs_hash)(cfs_hash_t *hs, const void *key, unsigned mask); + /** return key address of @hnode */ + void * (*hs_key)(struct hlist_node *hnode); + /** copy key from @hnode to @key */ + void (*hs_keycpy)(struct hlist_node *hnode, void *key); + /** + * compare @key with key of @hnode + * returns 1 on a match + */ + int (*hs_keycmp)(const void *key, struct hlist_node *hnode); + /** return object address of @hnode, i.e: container_of(...hnode) */ + void * (*hs_object)(struct hlist_node *hnode); + /** get refcount of item, always called with holding bucket-lock */ + void (*hs_get)(cfs_hash_t *hs, struct hlist_node *hnode); + /** release refcount of item */ + void (*hs_put)(cfs_hash_t *hs, struct hlist_node *hnode); + /** release refcount of item, always called with holding bucket-lock */ + void (*hs_put_locked)(cfs_hash_t *hs, struct hlist_node *hnode); + /** it's called before removing of @hnode */ + void (*hs_exit)(cfs_hash_t *hs, struct hlist_node *hnode); +} cfs_hash_ops_t; + +/** total number of buckets in @hs */ +#define CFS_HASH_NBKT(hs) \ + (1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits)) + +/** total number of buckets in @hs while rehashing */ +#define CFS_HASH_RH_NBKT(hs) \ + (1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits)) + +/** number of hlist for in bucket */ +#define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits) + +/** total number of hlist in @hs */ +#define CFS_HASH_NHLIST(hs) (1U << (hs)->hs_cur_bits) + +/** total number of hlist in @hs while rehashing */ +#define CFS_HASH_RH_NHLIST(hs) (1U << (hs)->hs_rehash_bits) + +static inline int +cfs_hash_with_no_lock(cfs_hash_t *hs) +{ + /* caller will serialize all operations for this hash-table */ + return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0; +} + +static inline int +cfs_hash_with_no_bktlock(cfs_hash_t *hs) +{ + /* no bucket lock, one single lock to protect the hash-table */ + return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0; +} + +static inline int +cfs_hash_with_rw_bktlock(cfs_hash_t *hs) +{ + /* rwlock to protect hash bucket */ + return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0; +} + +static inline int +cfs_hash_with_spin_bktlock(cfs_hash_t *hs) +{ + /* spinlock to protect hash bucket */ + return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0; +} + +static inline int +cfs_hash_with_add_tail(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0; +} + +static inline int +cfs_hash_with_no_itemref(cfs_hash_t *hs) +{ + /* hash-table doesn't keep refcount on item, + * item can't be removed from hash unless it's + * ZERO refcount */ + return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0; +} + +static inline int +cfs_hash_with_bigname(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_BIGNAME) != 0; +} + +static inline int +cfs_hash_with_counter(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_COUNTER) != 0; +} + +static inline int +cfs_hash_with_rehash(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_REHASH) != 0; +} + +static inline int +cfs_hash_with_rehash_key(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0; +} + +static inline int +cfs_hash_with_shrink(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_SHRINK) != 0; +} + +static inline int +cfs_hash_with_assert_empty(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0; +} + +static inline int +cfs_hash_with_depth(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_DEPTH) != 0; +} + +static inline int +cfs_hash_with_nblk_change(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0; +} + +static inline int +cfs_hash_is_exiting(cfs_hash_t *hs) +{ /* cfs_hash_destroy is called */ + return hs->hs_exiting; +} + +static inline int +cfs_hash_is_rehashing(cfs_hash_t *hs) +{ /* rehash is launched */ + return hs->hs_rehash_bits != 0; +} + +static inline int +cfs_hash_is_iterating(cfs_hash_t *hs) +{ /* someone is calling cfs_hash_for_each_* */ + return hs->hs_iterating || hs->hs_iterators != 0; +} + +static inline int +cfs_hash_bkt_size(cfs_hash_t *hs) +{ + return offsetof(cfs_hash_bucket_t, hsb_head[0]) + + hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) + + hs->hs_extra_bytes; +} + +#define CFS_HOP(hs, op) (hs)->hs_ops->hs_ ## op + +static inline unsigned +cfs_hash_id(cfs_hash_t *hs, const void *key, unsigned mask) +{ + return CFS_HOP(hs, hash)(hs, key, mask); +} + +static inline void * +cfs_hash_key(cfs_hash_t *hs, struct hlist_node *hnode) +{ + return CFS_HOP(hs, key)(hnode); +} + +static inline void +cfs_hash_keycpy(cfs_hash_t *hs, struct hlist_node *hnode, void *key) +{ + if (CFS_HOP(hs, keycpy) != NULL) + CFS_HOP(hs, keycpy)(hnode, key); +} + +/** + * Returns 1 on a match, + */ +static inline int +cfs_hash_keycmp(cfs_hash_t *hs, const void *key, struct hlist_node *hnode) +{ + return CFS_HOP(hs, keycmp)(key, hnode); +} + +static inline void * +cfs_hash_object(cfs_hash_t *hs, struct hlist_node *hnode) +{ + return CFS_HOP(hs, object)(hnode); +} + +static inline void +cfs_hash_get(cfs_hash_t *hs, struct hlist_node *hnode) +{ + return CFS_HOP(hs, get)(hs, hnode); +} + +static inline void +cfs_hash_put_locked(cfs_hash_t *hs, struct hlist_node *hnode) +{ + LASSERT(CFS_HOP(hs, put_locked) != NULL); + + return CFS_HOP(hs, put_locked)(hs, hnode); +} + +static inline void +cfs_hash_put(cfs_hash_t *hs, struct hlist_node *hnode) +{ + LASSERT(CFS_HOP(hs, put) != NULL); + + return CFS_HOP(hs, put)(hs, hnode); +} + +static inline void +cfs_hash_exit(cfs_hash_t *hs, struct hlist_node *hnode) +{ + if (CFS_HOP(hs, exit)) + CFS_HOP(hs, exit)(hs, hnode); +} + +static inline void cfs_hash_lock(cfs_hash_t *hs, int excl) +{ + hs->hs_lops->hs_lock(&hs->hs_lock, excl); +} + +static inline void cfs_hash_unlock(cfs_hash_t *hs, int excl) +{ + hs->hs_lops->hs_unlock(&hs->hs_lock, excl); +} + +static inline int cfs_hash_dec_and_lock(cfs_hash_t *hs, + atomic_t *condition) +{ + LASSERT(cfs_hash_with_no_bktlock(hs)); + return atomic_dec_and_lock(condition, &hs->hs_lock.spin); +} + +static inline void cfs_hash_bd_lock(cfs_hash_t *hs, + cfs_hash_bd_t *bd, int excl) +{ + hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl); +} + +static inline void cfs_hash_bd_unlock(cfs_hash_t *hs, + cfs_hash_bd_t *bd, int excl) +{ + hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl); +} + +/** + * operations on cfs_hash bucket (bd: bucket descriptor), + * they are normally for hash-table without rehash + */ +void cfs_hash_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bd); + +static inline void cfs_hash_bd_get_and_lock(cfs_hash_t *hs, const void *key, + cfs_hash_bd_t *bd, int excl) +{ + cfs_hash_bd_get(hs, key, bd); + cfs_hash_bd_lock(hs, bd, excl); +} + +static inline unsigned cfs_hash_bd_index_get(cfs_hash_t *hs, cfs_hash_bd_t *bd) +{ + return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits); +} + +static inline void cfs_hash_bd_index_set(cfs_hash_t *hs, + unsigned index, cfs_hash_bd_t *bd) +{ + bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits]; + bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U); +} + +static inline void * +cfs_hash_bd_extra_get(cfs_hash_t *hs, cfs_hash_bd_t *bd) +{ + return (void *)bd->bd_bucket + + cfs_hash_bkt_size(hs) - hs->hs_extra_bytes; +} + +static inline __u32 +cfs_hash_bd_version_get(cfs_hash_bd_t *bd) +{ + /* need hold cfs_hash_bd_lock */ + return bd->bd_bucket->hsb_version; +} + +static inline __u32 +cfs_hash_bd_count_get(cfs_hash_bd_t *bd) +{ + /* need hold cfs_hash_bd_lock */ + return bd->bd_bucket->hsb_count; +} + +static inline int +cfs_hash_bd_depmax_get(cfs_hash_bd_t *bd) +{ + return bd->bd_bucket->hsb_depmax; +} + +static inline int +cfs_hash_bd_compare(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2) +{ + if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index) + return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index; + + if (bd1->bd_offset != bd2->bd_offset) + return bd1->bd_offset - bd2->bd_offset; + + return 0; +} + +void cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode); +void cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode); +void cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old, + cfs_hash_bd_t *bd_new, struct hlist_node *hnode); + +static inline int cfs_hash_bd_dec_and_lock(cfs_hash_t *hs, cfs_hash_bd_t *bd, + atomic_t *condition) +{ + LASSERT(cfs_hash_with_spin_bktlock(hs)); + return atomic_dec_and_lock(condition, + &bd->bd_bucket->hsb_lock.spin); +} + +static inline struct hlist_head *cfs_hash_bd_hhead(cfs_hash_t *hs, + cfs_hash_bd_t *bd) +{ + return hs->hs_hops->hop_hhead(hs, bd); +} + +struct hlist_node *cfs_hash_bd_lookup_locked(cfs_hash_t *hs, + cfs_hash_bd_t *bd, const void *key); +struct hlist_node *cfs_hash_bd_peek_locked(cfs_hash_t *hs, + cfs_hash_bd_t *bd, const void *key); +struct hlist_node *cfs_hash_bd_findadd_locked(cfs_hash_t *hs, + cfs_hash_bd_t *bd, const void *key, + struct hlist_node *hnode, + int insist_add); +struct hlist_node *cfs_hash_bd_finddel_locked(cfs_hash_t *hs, + cfs_hash_bd_t *bd, const void *key, + struct hlist_node *hnode); + +/** + * operations on cfs_hash bucket (bd: bucket descriptor), + * they are safe for hash-table with rehash + */ +void cfs_hash_dual_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bds); +void cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl); +void cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl); + +static inline void cfs_hash_dual_bd_get_and_lock(cfs_hash_t *hs, const void *key, + cfs_hash_bd_t *bds, int excl) +{ + cfs_hash_dual_bd_get(hs, key, bds); + cfs_hash_dual_bd_lock(hs, bds, excl); +} + +struct hlist_node *cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs, + cfs_hash_bd_t *bds, + const void *key); +struct hlist_node *cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs, + cfs_hash_bd_t *bds, + const void *key, + struct hlist_node *hnode, + int insist_add); +struct hlist_node *cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs, + cfs_hash_bd_t *bds, + const void *key, + struct hlist_node *hnode); + +/* Hash init/cleanup functions */ +cfs_hash_t *cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, + unsigned bkt_bits, unsigned extra_bytes, + unsigned min_theta, unsigned max_theta, + cfs_hash_ops_t *ops, unsigned flags); + +cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs); +void cfs_hash_putref(cfs_hash_t *hs); + +/* Hash addition functions */ +void cfs_hash_add(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode); +int cfs_hash_add_unique(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode); +void *cfs_hash_findadd_unique(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode); + +/* Hash deletion functions */ +void *cfs_hash_del(cfs_hash_t *hs, const void *key, struct hlist_node *hnode); +void *cfs_hash_del_key(cfs_hash_t *hs, const void *key); + +/* Hash lookup/for_each functions */ +#define CFS_HASH_LOOP_HOG 1024 + +typedef int (*cfs_hash_for_each_cb_t)(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *node, void *data); +void *cfs_hash_lookup(cfs_hash_t *hs, const void *key); +void cfs_hash_for_each(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data); +void cfs_hash_for_each_safe(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data); +int cfs_hash_for_each_nolock(cfs_hash_t *hs, + cfs_hash_for_each_cb_t, void *data); +int cfs_hash_for_each_empty(cfs_hash_t *hs, + cfs_hash_for_each_cb_t, void *data); +void cfs_hash_for_each_key(cfs_hash_t *hs, const void *key, + cfs_hash_for_each_cb_t, void *data); +typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data); +void cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t, void *data); + +void cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex, + cfs_hash_for_each_cb_t, void *data); +int cfs_hash_is_empty(cfs_hash_t *hs); +__u64 cfs_hash_size_get(cfs_hash_t *hs); + +/* + * Rehash - Theta is calculated to be the average chained + * hash depth assuming a perfectly uniform hash funcion. + */ +void cfs_hash_rehash_cancel_locked(cfs_hash_t *hs); +void cfs_hash_rehash_cancel(cfs_hash_t *hs); +int cfs_hash_rehash(cfs_hash_t *hs, int do_rehash); +void cfs_hash_rehash_key(cfs_hash_t *hs, const void *old_key, + void *new_key, struct hlist_node *hnode); + +#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 +/* Validate hnode references the correct key */ +static inline void +cfs_hash_key_validate(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode) +{ + LASSERT(cfs_hash_keycmp(hs, key, hnode)); +} + +/* Validate hnode is in the correct bucket */ +static inline void +cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode) +{ + cfs_hash_bd_t bds[2]; + + cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds); + LASSERT(bds[0].bd_bucket == bd->bd_bucket || + bds[1].bd_bucket == bd->bd_bucket); +} + +#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */ + +static inline void +cfs_hash_key_validate(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode) {} + +static inline void +cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode) {} + +#endif /* CFS_HASH_DEBUG_LEVEL */ + +#define CFS_HASH_THETA_BITS 10 +#define CFS_HASH_MIN_THETA (1U << (CFS_HASH_THETA_BITS - 1)) +#define CFS_HASH_MAX_THETA (1U << (CFS_HASH_THETA_BITS + 1)) + +/* Return integer component of theta */ +static inline int __cfs_hash_theta_int(int theta) +{ + return (theta >> CFS_HASH_THETA_BITS); +} + +/* Return a fractional value between 0 and 999 */ +static inline int __cfs_hash_theta_frac(int theta) +{ + return ((theta * 1000) >> CFS_HASH_THETA_BITS) - + (__cfs_hash_theta_int(theta) * 1000); +} + +static inline int __cfs_hash_theta(cfs_hash_t *hs) +{ + return (atomic_read(&hs->hs_count) << + CFS_HASH_THETA_BITS) >> hs->hs_cur_bits; +} + +static inline void __cfs_hash_set_theta(cfs_hash_t *hs, int min, int max) +{ + LASSERT(min < max); + hs->hs_min_theta = (__u16)min; + hs->hs_max_theta = (__u16)max; +} + +/* Generic debug formatting routines mainly for proc handler */ +struct seq_file; +int cfs_hash_debug_header(struct seq_file *m); +int cfs_hash_debug_str(cfs_hash_t *hs, struct seq_file *m); + +/* + * Generic djb2 hash algorithm for character arrays. + */ +static inline unsigned +cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask) +{ + unsigned i, hash = 5381; + + LASSERT(key != NULL); + + for (i = 0; i < size; i++) + hash = hash * 33 + ((char *)key)[i]; + + return (hash & mask); +} + +/* + * Generic u32 hash algorithm. + */ +static inline unsigned +cfs_hash_u32_hash(const __u32 key, unsigned mask) +{ + return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask); +} + +/* + * Generic u64 hash algorithm. + */ +static inline unsigned +cfs_hash_u64_hash(const __u64 key, unsigned mask) +{ + return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask); +} + +/** iterate over all buckets in @bds (array of cfs_hash_bd_t) */ +#define cfs_hash_for_each_bd(bds, n, i) \ + for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++) + +/** iterate over all buckets of @hs */ +#define cfs_hash_for_each_bucket(hs, bd, pos) \ + for (pos = 0; \ + pos < CFS_HASH_NBKT(hs) && \ + ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++) + +/** iterate over all hlist of bucket @bd */ +#define cfs_hash_bd_for_each_hlist(hs, bd, hlist) \ + for ((bd)->bd_offset = 0; \ + (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) && \ + (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL; \ + (bd)->bd_offset++) + +/* !__LIBCFS__HASH_H__ */ +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h new file mode 100644 index 000000000000..bfa6d7b245ea --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h @@ -0,0 +1,200 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. A copy is + * included in the COPYING file that accompanied this code. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011 Intel Corporation + */ +/* + * libcfs/include/libcfs/heap.h + * + * Author: Eric Barton <eeb@whamcloud.com> + * Liang Zhen <liang@whamcloud.com> + */ + +#ifndef __LIBCFS_HEAP_H__ +#define __LIBCFS_HEAP_H__ + +/** \defgroup heap Binary heap + * + * The binary heap is a scalable data structure created using a binary tree. It + * is capable of maintaining large sets of elements sorted usually by one or + * more element properties, but really based on anything that can be used as a + * binary predicate in order to determine the relevant ordering of any two nodes + * that belong to the set. There is no search operation, rather the intention is + * for the element of the lowest priority which will always be at the root of + * the tree (as this is an implementation of a min-heap) to be removed by users + * for consumption. + * + * Users of the heap should embed a \e cfs_binheap_node_t object instance on + * every object of the set that they wish the binary heap instance to handle, + * and (at a minimum) provide a cfs_binheap_ops_t::hop_compare() implementation + * which is used by the heap as the binary predicate during its internal sorting + * operations. + * + * The current implementation enforces no locking scheme, and so assumes the + * user caters for locking between calls to insert, delete and lookup + * operations. Since the only consumer for the data structure at this point + * are NRS policies, and these operate on a per-CPT basis, binary heap instances + * are tied to a specific CPT. + * @{ + */ + +/** + * Binary heap node. + * + * Objects of this type are embedded into objects of the ordered set that is to + * be maintained by a \e cfs_binheap_t instance. + */ +typedef struct { + /** Index into the binary tree */ + unsigned int chn_index; +} cfs_binheap_node_t; + +#define CBH_SHIFT 9 +#define CBH_SIZE (1 << CBH_SHIFT) /* # ptrs per level */ +#define CBH_MASK (CBH_SIZE - 1) +#define CBH_NOB (CBH_SIZE * sizeof(cfs_binheap_node_t *)) + +#define CBH_POISON 0xdeadbeef + +/** + * Binary heap flags. + */ +enum { + CBH_FLAG_ATOMIC_GROW = 1, +}; + +struct cfs_binheap; + +/** + * Binary heap operations. + */ +typedef struct { + /** + * Called right before inserting a node into the binary heap. + * + * Implementing this operation is optional. + * + * \param[in] h The heap + * \param[in] e The node + * + * \retval 0 success + * \retval != 0 error + */ + int (*hop_enter)(struct cfs_binheap *h, + cfs_binheap_node_t *e); + /** + * Called right after removing a node from the binary heap. + * + * Implementing this operation is optional. + * + * \param[in] h The heap + * \param[in] e The node + */ + void (*hop_exit)(struct cfs_binheap *h, + cfs_binheap_node_t *e); + /** + * A binary predicate which is called during internal heap sorting + * operations, and used in order to determine the relevant ordering of + * two heap nodes. + * + * Implementing this operation is mandatory. + * + * \param[in] a The first heap node + * \param[in] b The second heap node + * + * \retval 0 Node a > node b + * \retval 1 Node a < node b + * + * \see cfs_binheap_bubble() + * \see cfs_biheap_sink() + */ + int (*hop_compare)(cfs_binheap_node_t *a, + cfs_binheap_node_t *b); +} cfs_binheap_ops_t; + +/** + * Binary heap object. + * + * Sorts elements of type \e cfs_binheap_node_t + */ +typedef struct cfs_binheap { + /** Triple indirect */ + cfs_binheap_node_t ****cbh_elements3; + /** double indirect */ + cfs_binheap_node_t ***cbh_elements2; + /** single indirect */ + cfs_binheap_node_t **cbh_elements1; + /** # elements referenced */ + unsigned int cbh_nelements; + /** high water mark */ + unsigned int cbh_hwm; + /** user flags */ + unsigned int cbh_flags; + /** operations table */ + cfs_binheap_ops_t *cbh_ops; + /** private data */ + void *cbh_private; + /** associated CPT table */ + struct cfs_cpt_table *cbh_cptab; + /** associated CPT id of this cfs_binheap_t::cbh_cptab */ + int cbh_cptid; +} cfs_binheap_t; + +void cfs_binheap_destroy(cfs_binheap_t *h); +cfs_binheap_t *cfs_binheap_create(cfs_binheap_ops_t *ops, unsigned int flags, + unsigned count, void *arg, + struct cfs_cpt_table *cptab, int cptid); +cfs_binheap_node_t *cfs_binheap_find(cfs_binheap_t *h, unsigned int idx); +int cfs_binheap_insert(cfs_binheap_t *h, cfs_binheap_node_t *e); +void cfs_binheap_remove(cfs_binheap_t *h, cfs_binheap_node_t *e); + +static inline int +cfs_binheap_size(cfs_binheap_t *h) +{ + return h->cbh_nelements; +} + +static inline int +cfs_binheap_is_empty(cfs_binheap_t *h) +{ + return h->cbh_nelements == 0; +} + +static inline cfs_binheap_node_t * +cfs_binheap_root(cfs_binheap_t *h) +{ + return cfs_binheap_find(h, 0); +} + +static inline cfs_binheap_node_t * +cfs_binheap_remove_root(cfs_binheap_t *h) +{ + cfs_binheap_node_t *e = cfs_binheap_find(h, 0); + + if (e != NULL) + cfs_binheap_remove(h, e); + return e; +} + +/** @} heap */ + +#endif /* __LIBCFS_HEAP_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h new file mode 100644 index 000000000000..5be367973508 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h @@ -0,0 +1,222 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_ioctl.h + * + * Low-level ioctl data structures. Kernel ioctl functions declared here, + * and user space functions are in libcfsutil_ioctl.h. + * + */ + +#ifndef __LIBCFS_IOCTL_H__ +#define __LIBCFS_IOCTL_H__ + + +#define LIBCFS_IOCTL_VERSION 0x0001000a + +struct libcfs_ioctl_data { + __u32 ioc_len; + __u32 ioc_version; + + __u64 ioc_nid; + __u64 ioc_u64[1]; + + __u32 ioc_flags; + __u32 ioc_count; + __u32 ioc_net; + __u32 ioc_u32[7]; + + __u32 ioc_inllen1; + char *ioc_inlbuf1; + __u32 ioc_inllen2; + char *ioc_inlbuf2; + + __u32 ioc_plen1; /* buffers in userspace */ + char *ioc_pbuf1; + __u32 ioc_plen2; /* buffers in userspace */ + char *ioc_pbuf2; + + char ioc_bulk[0]; +}; + + +struct libcfs_ioctl_hdr { + __u32 ioc_len; + __u32 ioc_version; +}; + +struct libcfs_debug_ioctl_data +{ + struct libcfs_ioctl_hdr hdr; + unsigned int subs; + unsigned int debug; +}; + +#define LIBCFS_IOC_INIT(data) \ +do { \ + memset(&data, 0, sizeof(data)); \ + data.ioc_version = LIBCFS_IOCTL_VERSION; \ + data.ioc_len = sizeof(data); \ +} while (0) + + +struct libcfs_ioctl_handler { + struct list_head item; + int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data); +}; + +#define DECLARE_IOCTL_HANDLER(ident, func) \ + struct libcfs_ioctl_handler ident = { \ + /* .item = */ LIST_HEAD_INIT(ident.item), \ + /* .handle_ioctl = */ func \ + } + + +/* FIXME check conflict with lustre_lib.h */ +#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) + + +/* ioctls for manipulating snapshots 30- */ +#define IOC_LIBCFS_TYPE 'e' +#define IOC_LIBCFS_MIN_NR 30 +/* libcfs ioctls */ +#define IOC_LIBCFS_PANIC _IOWR('e', 30, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LWT_CONTROL _IOWR('e', 33, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LWT_SNAPSHOT _IOWR('e', 34, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LWT_LOOKUP_STRING _IOWR('e', 35, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, IOCTL_LIBCFS_TYPE) +/* lnet ioctls */ +#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_ROUTE _IOWR('e', 52, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_ROUTE _IOWR('e', 53, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_ROUTE _IOWR('e', 54, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE) +/* lnd ioctls */ +#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_TXDESC _IOWR('e', 77, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE) + +#define IOC_LIBCFS_MAX_NR 80 + +static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) +{ + int len = sizeof(*data); + len += cfs_size_round(data->ioc_inllen1); + len += cfs_size_round(data->ioc_inllen2); + return len; +} + +static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) +{ + if (data->ioc_len > (1<<30)) { + CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen1 > (1<<30)) { + CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen2 > (1<<30)) { + CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inlbuf1 && !data->ioc_inllen1) { + CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_inlbuf2 && !data->ioc_inllen2) { + CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf1 && !data->ioc_plen1) { + CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf2 && !data->ioc_plen2) { + CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); + return 1; + } + if (data->ioc_plen1 && !data->ioc_pbuf1) { + CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); + return 1; + } + if (data->ioc_plen2 && !data->ioc_pbuf2) { + CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); + return 1; + } + if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) { + CERROR ("LIBCFS ioctl: packlen != ioc_len\n"); + return 1; + } + if (data->ioc_inllen1 && + data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { + CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); + return 1; + } + if (data->ioc_inllen2 && + data->ioc_bulk[cfs_size_round(data->ioc_inllen1) + + data->ioc_inllen2 - 1] != '\0') { + CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); + return 1; + } + return 0; +} + + +extern int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); +extern int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); +extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg); +extern int libcfs_ioctl_popdata(void *arg, void *buf, int size); + + +#endif /* __LIBCFS_IOCTL_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h new file mode 100644 index 000000000000..596a15fc8996 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h @@ -0,0 +1,117 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: Nathan Rutman <nathan.rutman@sun.com> + * + * libcfs/include/libcfs/libcfs_kernelcomm.h + * + * Kernel <-> userspace communication routines. + * The definitions below are used in the kernel and userspace. + * + */ + +#ifndef __LIBCFS_KERNELCOMM_H__ +#define __LIBCFS_KERNELCOMM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +/* KUC message header. + * All current and future KUC messages should use this header. + * To avoid having to include Lustre headers from libcfs, define this here. + */ +struct kuc_hdr { + __u16 kuc_magic; + __u8 kuc_transport; /* Each new Lustre feature should use a different + transport */ + __u8 kuc_flags; + __u16 kuc_msgtype; /* Message type or opcode, transport-specific */ + __u16 kuc_msglen; /* Including header */ +} __attribute__((aligned(sizeof(__u64)))); + +#define KUC_MAGIC 0x191C /*Lustre9etLinC */ +#define KUC_FL_BLOCK 0x01 /* Wait for send */ + +/* kuc_msgtype values are defined in each transport */ +enum kuc_transport_type { + KUC_TRANSPORT_GENERIC = 1, + KUC_TRANSPORT_HSM = 2, + KUC_TRANSPORT_CHANGELOG = 3, +}; + +enum kuc_generic_message_type { + KUC_MSG_SHUTDOWN = 1, +}; + +/* prototype for callback function on kuc groups */ +typedef int (*libcfs_kkuc_cb_t)(__u32 data, void *cb_arg); + +/* KUC Broadcast Groups. This determines which userspace process hears which + * messages. Mutliple transports may be used within a group, or multiple + * groups may use the same transport. Broadcast + * groups need not be used if e.g. a UID is specified instead; + * use group 0 to signify unicast. + */ +#define KUC_GRP_HSM 0x02 +#define KUC_GRP_MAX KUC_GRP_HSM + +/* Kernel methods */ +extern int libcfs_kkuc_msg_put(struct file *fp, void *payload); +extern int libcfs_kkuc_group_put(int group, void *payload); +extern int libcfs_kkuc_group_add(struct file *fp, int uid, int group, + __u32 data); +extern int libcfs_kkuc_group_rem(int uid, int group); +extern int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func, + void *cb_arg); + +#define LK_FLG_STOP 0x01 + +/* kernelcomm control structure, passed from userspace to kernel */ +typedef struct lustre_kernelcomm { + __u32 lk_wfd; + __u32 lk_rfd; + __u32 lk_uid; + __u32 lk_group; + __u32 lk_data; + __u32 lk_flags; +} __attribute__((packed)) lustre_kernelcomm; + +/* Userspace methods */ +extern int libcfs_ukuc_start(lustre_kernelcomm *l, int groups); +extern int libcfs_ukuc_stop(lustre_kernelcomm *l); +extern int libcfs_ukuc_msg_get(lustre_kernelcomm *l, char *buf, int maxsize, + int transport); + +#endif /* __LIBCFS_KERNELCOMM_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h new file mode 100644 index 000000000000..9c40ed904da5 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h @@ -0,0 +1,101 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_prim.h + * + * General primitives. + * + */ + +#ifndef __LIBCFS_PRIM_H__ +#define __LIBCFS_PRIM_H__ + +#ifndef EXPORT_SYMBOL +# define EXPORT_SYMBOL(s) +#endif + +/* + * Schedule + */ +void cfs_pause(cfs_duration_t ticks); + +/* + * Timer + */ +typedef void (cfs_timer_func_t)(ulong_ptr_t); +void schedule_timeout_and_set_state(cfs_task_state_t, int64_t); + +void init_waitqueue_entry_current(wait_queue_t *link); +int64_t waitq_timedwait(wait_queue_t *, cfs_task_state_t, int64_t); +void waitq_wait(wait_queue_t *, cfs_task_state_t); +void add_wait_queue_exclusive_head(wait_queue_head_t *, wait_queue_t *); + +void cfs_init_timer(timer_list_t *t); +void cfs_timer_init(timer_list_t *t, cfs_timer_func_t *func, void *arg); +void cfs_timer_done(timer_list_t *t); +void cfs_timer_arm(timer_list_t *t, cfs_time_t deadline); +void cfs_timer_disarm(timer_list_t *t); +int cfs_timer_is_armed(timer_list_t *t); +cfs_time_t cfs_timer_deadline(timer_list_t *t); + +/* + * Memory + */ +#ifndef memory_pressure_get +#define memory_pressure_get() (0) +#endif +#ifndef memory_pressure_set +#define memory_pressure_set() do {} while (0) +#endif +#ifndef memory_pressure_clr +#define memory_pressure_clr() do {} while (0) +#endif + +static inline int cfs_memory_pressure_get_and_set(void) +{ + int old = memory_pressure_get(); + + if (!old) + memory_pressure_set(); + return old; +} + +static inline void cfs_memory_pressure_restore(int old) +{ + if (old) + memory_pressure_set(); + else + memory_pressure_clr(); + return; +} +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h new file mode 100644 index 000000000000..056caa467126 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h @@ -0,0 +1,577 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_private.h + * + * Various defines for libcfs. + * + */ + +#ifndef __LIBCFS_PRIVATE_H__ +#define __LIBCFS_PRIVATE_H__ + +/* XXX this layering violation is for nidstrings */ +#include <linux/lnet/types.h> + +#ifndef DEBUG_SUBSYSTEM +# define DEBUG_SUBSYSTEM S_UNDEFINED +#endif + + + +/* + * When this is on, LASSERT macro includes check for assignment used instead + * of equality check, but doesn't have unlikely(). Turn this on from time to + * time to make test-builds. This shouldn't be on for production release. + */ +#define LASSERT_CHECKED (0) + + +#define LASSERTF(cond, fmt, ...) \ +do { \ + if (unlikely(!(cond))) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL); \ + libcfs_debug_msg(&__msg_data, \ + "ASSERTION( %s ) failed: " fmt, #cond, \ + ## __VA_ARGS__); \ + lbug_with_loc(&__msg_data); \ + } \ +} while (0) + +#define LASSERT(cond) LASSERTF(cond, "\n") + +#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK +/** + * This is for more expensive checks that one doesn't want to be enabled all + * the time. LINVRNT() has to be explicitly enabled by + * CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK option. + */ +# define LINVRNT(exp) LASSERT(exp) +#else +# define LINVRNT(exp) ((void)sizeof!!(exp)) +#endif + +#define KLASSERT(e) LASSERT(e) + +void lbug_with_loc(struct libcfs_debug_msg_data *) __attribute__((noreturn)); + +#define LBUG() \ +do { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \ + lbug_with_loc(&msgdata); \ +} while(0) + +extern atomic_t libcfs_kmemory; +/* + * Memory + */ + +# define libcfs_kmem_inc(ptr, size) \ +do { \ + atomic_add(size, &libcfs_kmemory); \ +} while (0) + +# define libcfs_kmem_dec(ptr, size) \ +do { \ + atomic_sub(size, &libcfs_kmemory); \ +} while (0) + +# define libcfs_kmem_read() \ + atomic_read(&libcfs_kmemory) + + +#ifndef LIBCFS_VMALLOC_SIZE +#define LIBCFS_VMALLOC_SIZE (2 << PAGE_CACHE_SHIFT) /* 2 pages */ +#endif + +#define LIBCFS_ALLOC_PRE(size, mask) \ +do { \ + LASSERT(!in_interrupt() || \ + ((size) <= LIBCFS_VMALLOC_SIZE && \ + ((mask) & GFP_ATOMIC)) != 0); \ +} while (0) + +#define LIBCFS_ALLOC_POST(ptr, size) \ +do { \ + if (unlikely((ptr) == NULL)) { \ + CERROR("LNET: out of memory at %s:%d (tried to alloc '" \ + #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size)); \ + CERROR("LNET: %d total bytes allocated by lnet\n", \ + libcfs_kmem_read()); \ + } else { \ + memset((ptr), 0, (size)); \ + libcfs_kmem_inc((ptr), (size)); \ + CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), (ptr), libcfs_kmem_read()); \ + } \ +} while (0) + +/** + * allocate memory with GFP flags @mask + */ +#define LIBCFS_ALLOC_GFP(ptr, size, mask) \ +do { \ + LIBCFS_ALLOC_PRE((size), (mask)); \ + (ptr) = (size) <= LIBCFS_VMALLOC_SIZE ? \ + kmalloc((size), (mask)) : vmalloc(size); \ + LIBCFS_ALLOC_POST((ptr), (size)); \ +} while (0) + +/** + * default allocator + */ +#define LIBCFS_ALLOC(ptr, size) \ + LIBCFS_ALLOC_GFP(ptr, size, __GFP_IO) + +/** + * non-sleeping allocator + */ +#define LIBCFS_ALLOC_ATOMIC(ptr, size) \ + LIBCFS_ALLOC_GFP(ptr, size, GFP_ATOMIC) + +/** + * allocate memory for specified CPU partition + * \a cptab != NULL, \a cpt is CPU partition id of \a cptab + * \a cptab == NULL, \a cpt is HW NUMA node id + */ +#define LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, mask) \ +do { \ + LIBCFS_ALLOC_PRE((size), (mask)); \ + (ptr) = (size) <= LIBCFS_VMALLOC_SIZE ? \ + kmalloc_node((size), (mask), cfs_cpt_spread_node(cptab, cpt)) :\ + vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt)); \ + LIBCFS_ALLOC_POST((ptr), (size)); \ +} while (0) + +/** default numa allocator */ +#define LIBCFS_CPT_ALLOC(ptr, cptab, cpt, size) \ + LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, __GFP_IO) + +#define LIBCFS_FREE(ptr, size) \ +do { \ + int s = (size); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ + "%s:%d\n", s, __FILE__, __LINE__); \ + break; \ + } \ + libcfs_kmem_dec((ptr), s); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + s, (ptr), libcfs_kmem_read()); \ + if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ + vfree(ptr); \ + else \ + kfree(ptr); \ +} while (0) + +/******************************************************************************/ + +/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */ +#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) +#define ___htonl(x) __cpu_to_be32(x) +#define ___htons(x) __cpu_to_be16(x) +#define ___ntohl(x) __be32_to_cpu(x) +#define ___ntohs(x) __be16_to_cpu(x) +#define htonl(x) ___htonl(x) +#define ntohl(x) ___ntohl(x) +#define htons(x) ___htons(x) +#define ntohs(x) ___ntohs(x) +#endif + +void libcfs_debug_dumpstack(task_t *tsk); +void libcfs_run_upcall(char **argv); +void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *); +void libcfs_debug_dumplog(void); +int libcfs_debug_init(unsigned long bufsize); +int libcfs_debug_cleanup(void); +int libcfs_debug_clear_buffer(void); +int libcfs_debug_mark_buffer(const char *text); + +void libcfs_debug_set_level(unsigned int debug_level); + + +/* + * allocate per-cpu-partition data, returned value is an array of pointers, + * variable can be indexed by CPU ID. + * cptable != NULL: size of array is number of CPU partitions + * cptable == NULL: size of array is number of HW cores + */ +void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size); +/* + * destory per-cpu-partition variable + */ +void cfs_percpt_free(void *vars); +int cfs_percpt_number(void *vars); +void *cfs_percpt_current(void *vars); +void *cfs_percpt_index(void *vars, int idx); + +#define cfs_percpt_for_each(var, i, vars) \ + for (i = 0; i < cfs_percpt_number(vars) && \ + ((var) = (vars)[i]) != NULL; i++) + +/* + * allocate a variable array, returned value is an array of pointers. + * Caller can specify length of array by count. + */ +void *cfs_array_alloc(int count, unsigned int size); +void cfs_array_free(void *vars); + +#define LASSERT_ATOMIC_ENABLED (1) + +#if LASSERT_ATOMIC_ENABLED + +/** assert value of @a is equal to @v */ +#define LASSERT_ATOMIC_EQ(a, v) \ +do { \ + LASSERTF(atomic_read(a) == v, \ + "value: %d\n", atomic_read((a))); \ +} while (0) + +/** assert value of @a is unequal to @v */ +#define LASSERT_ATOMIC_NE(a, v) \ +do { \ + LASSERTF(atomic_read(a) != v, \ + "value: %d\n", atomic_read((a))); \ +} while (0) + +/** assert value of @a is little than @v */ +#define LASSERT_ATOMIC_LT(a, v) \ +do { \ + LASSERTF(atomic_read(a) < v, \ + "value: %d\n", atomic_read((a))); \ +} while (0) + +/** assert value of @a is little/equal to @v */ +#define LASSERT_ATOMIC_LE(a, v) \ +do { \ + LASSERTF(atomic_read(a) <= v, \ + "value: %d\n", atomic_read((a))); \ +} while (0) + +/** assert value of @a is great than @v */ +#define LASSERT_ATOMIC_GT(a, v) \ +do { \ + LASSERTF(atomic_read(a) > v, \ + "value: %d\n", atomic_read((a))); \ +} while (0) + +/** assert value of @a is great/equal to @v */ +#define LASSERT_ATOMIC_GE(a, v) \ +do { \ + LASSERTF(atomic_read(a) >= v, \ + "value: %d\n", atomic_read((a))); \ +} while (0) + +/** assert value of @a is great than @v1 and little than @v2 */ +#define LASSERT_ATOMIC_GT_LT(a, v1, v2) \ +do { \ + int __v = atomic_read(a); \ + LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v); \ +} while (0) + +/** assert value of @a is great than @v1 and little/equal to @v2 */ +#define LASSERT_ATOMIC_GT_LE(a, v1, v2) \ +do { \ + int __v = atomic_read(a); \ + LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v); \ +} while (0) + +/** assert value of @a is great/equal to @v1 and little than @v2 */ +#define LASSERT_ATOMIC_GE_LT(a, v1, v2) \ +do { \ + int __v = atomic_read(a); \ + LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v); \ +} while (0) + +/** assert value of @a is great/equal to @v1 and little/equal to @v2 */ +#define LASSERT_ATOMIC_GE_LE(a, v1, v2) \ +do { \ + int __v = atomic_read(a); \ + LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v); \ +} while (0) + +#else /* !LASSERT_ATOMIC_ENABLED */ + +#define LASSERT_ATOMIC_EQ(a, v) do {} while (0) +#define LASSERT_ATOMIC_NE(a, v) do {} while (0) +#define LASSERT_ATOMIC_LT(a, v) do {} while (0) +#define LASSERT_ATOMIC_LE(a, v) do {} while (0) +#define LASSERT_ATOMIC_GT(a, v) do {} while (0) +#define LASSERT_ATOMIC_GE(a, v) do {} while (0) +#define LASSERT_ATOMIC_GT_LT(a, v1, v2) do {} while (0) +#define LASSERT_ATOMIC_GT_LE(a, v1, v2) do {} while (0) +#define LASSERT_ATOMIC_GE_LT(a, v1, v2) do {} while (0) +#define LASSERT_ATOMIC_GE_LE(a, v1, v2) do {} while (0) + +#endif /* LASSERT_ATOMIC_ENABLED */ + +#define LASSERT_ATOMIC_ZERO(a) LASSERT_ATOMIC_EQ(a, 0) +#define LASSERT_ATOMIC_POS(a) LASSERT_ATOMIC_GT(a, 0) + +#define CFS_ALLOC_PTR(ptr) LIBCFS_ALLOC(ptr, sizeof (*(ptr))); +#define CFS_FREE_PTR(ptr) LIBCFS_FREE(ptr, sizeof (*(ptr))); + +/* + * percpu partition lock + * + * There are some use-cases like this in Lustre: + * . each CPU partition has it's own private data which is frequently changed, + * and mostly by the local CPU partition. + * . all CPU partitions share some global data, these data are rarely changed. + * + * LNet is typical example. + * CPU partition lock is designed for this kind of use-cases: + * . each CPU partition has it's own private lock + * . change on private data just needs to take the private lock + * . read on shared data just needs to take _any_ of private locks + * . change on shared data needs to take _all_ private locks, + * which is slow and should be really rare. + */ + +enum { + CFS_PERCPT_LOCK_EX = -1, /* negative */ +}; + + +struct cfs_percpt_lock { + /* cpu-partition-table for this lock */ + struct cfs_cpt_table *pcl_cptab; + /* exclusively locked */ + unsigned int pcl_locked; + /* private lock table */ + spinlock_t **pcl_locks; +}; + +/* return number of private locks */ +static inline int +cfs_percpt_lock_num(struct cfs_percpt_lock *pcl) +{ + return cfs_cpt_number(pcl->pcl_cptab); +} + + +/* + * create a cpu-partition lock based on CPU partition table \a cptab, + * each private lock has extra \a psize bytes padding data + */ +struct cfs_percpt_lock *cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab); +/* destroy a cpu-partition lock */ +void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl); + +/* lock private lock \a index of \a pcl */ +void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index); +/* unlock private lock \a index of \a pcl */ +void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index); +/* create percpt (atomic) refcount based on @cptab */ +atomic_t **cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int val); +/* destroy percpt refcount */ +void cfs_percpt_atomic_free(atomic_t **refs); +/* return sum of all percpu refs */ +int cfs_percpt_atomic_summary(atomic_t **refs); + + +/** Compile-time assertion. + + * Check an invariant described by a constant expression at compile time by + * forcing a compiler error if it does not hold. \a cond must be a constant + * expression as defined by the ISO C Standard: + * + * 6.8.4.2 The switch statement + * .... + * [#3] The expression of each case label shall be an integer + * constant expression and no two of the case constant + * expressions in the same switch statement shall have the same + * value after conversion... + * + */ +#define CLASSERT(cond) do {switch(42) {case (cond): case 0: break;}} while (0) + +/* support decl needed both by kernel and liblustre */ +int libcfs_isknown_lnd(int type); +char *libcfs_lnd2modname(int type); +char *libcfs_lnd2str(int type); +int libcfs_str2lnd(const char *str); +char *libcfs_net2str(__u32 net); +char *libcfs_nid2str(lnet_nid_t nid); +__u32 libcfs_str2net(const char *str); +lnet_nid_t libcfs_str2nid(const char *str); +int libcfs_str2anynid(lnet_nid_t *nid, const char *str); +char *libcfs_id2str(lnet_process_id_t id); +void cfs_free_nidlist(struct list_head *list); +int cfs_parse_nidlist(char *str, int len, struct list_head *list); +int cfs_match_nid(lnet_nid_t nid, struct list_head *list); + +/** \addtogroup lnet_addr + * @{ */ +/* how an LNET NID encodes net:address */ +/** extract the address part of an lnet_nid_t */ +#define LNET_NIDADDR(nid) ((__u32)((nid) & 0xffffffff)) +/** extract the network part of an lnet_nid_t */ +#define LNET_NIDNET(nid) ((__u32)(((nid) >> 32)) & 0xffffffff) +/** make an lnet_nid_t from a network part and an address part */ +#define LNET_MKNID(net,addr) ((((__u64)(net))<<32)|((__u64)(addr))) +/* how net encodes type:number */ +#define LNET_NETNUM(net) ((net) & 0xffff) +#define LNET_NETTYP(net) (((net) >> 16) & 0xffff) +#define LNET_MKNET(typ,num) ((((__u32)(typ))<<16)|((__u32)(num))) +/** @} lnet_addr */ + +/* max value for numeric network address */ +#define MAX_NUMERIC_VALUE 0xffffffff + +/* implication */ +#define ergo(a, b) (!(a) || (b)) +/* logical equivalence */ +#define equi(a, b) (!!(a) == !!(b)) + +#ifndef CFS_CURRENT_TIME +# define CFS_CURRENT_TIME time(0) +#endif + +/* -------------------------------------------------------------------- + * Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. + * All stuff about lwt are put in arch/kp30.h + * -------------------------------------------------------------------- */ + +struct libcfs_device_userstate +{ + int ldu_memhog_pages; + struct page *ldu_memhog_root_page; +}; + +/* what used to be in portals_lib.h */ +#ifndef MIN +# define MIN(a,b) (((a)<(b)) ? (a): (b)) +#endif +#ifndef MAX +# define MAX(a,b) (((a)>(b)) ? (a): (b)) +#endif + +#define MKSTR(ptr) ((ptr))? (ptr) : "" + +static inline int cfs_size_round4 (int val) +{ + return (val + 3) & (~0x3); +} + +#ifndef HAVE_CFS_SIZE_ROUND +static inline int cfs_size_round (int val) +{ + return (val + 7) & (~0x7); +} +#define HAVE_CFS_SIZE_ROUND +#endif + +static inline int cfs_size_round16(int val) +{ + return (val + 0xf) & (~0xf); +} + +static inline int cfs_size_round32(int val) +{ + return (val + 0x1f) & (~0x1f); +} + +static inline int cfs_size_round0(int val) +{ + if (!val) + return 0; + return (val + 1 + 7) & (~0x7); +} + +static inline size_t cfs_round_strlen(char *fset) +{ + return (size_t)cfs_size_round((int)strlen(fset) + 1); +} + +/* roundup \a val to power2 */ +static inline unsigned int cfs_power2_roundup(unsigned int val) +{ + if (val != LOWEST_BIT_SET(val)) { /* not a power of 2 already */ + do { + val &= ~LOWEST_BIT_SET(val); + } while (val != LOWEST_BIT_SET(val)); + /* ...and round up */ + val <<= 1; + } + return val; +} + +#define LOGL(var,len,ptr) \ +do { \ + if (var) \ + memcpy((char *)ptr, (const char *)var, len); \ + ptr += cfs_size_round(len); \ +} while (0) + +#define LOGU(var,len,ptr) \ +do { \ + if (var) \ + memcpy((char *)var, (const char *)ptr, len); \ + ptr += cfs_size_round(len); \ +} while (0) + +#define LOGL0(var,len,ptr) \ +do { \ + if (!len) \ + break; \ + memcpy((char *)ptr, (const char *)var, len); \ + *((char *)(ptr) + len) = 0; \ + ptr += cfs_size_round(len + 1); \ +} while (0) + +/** + * Lustre Network Driver types. + */ +enum { + /* Only add to these values (i.e. don't ever change or redefine them): + * network addresses depend on them... */ + QSWLND = 1, + SOCKLND = 2, + GMLND = 3, /* obsolete, keep it so that libcfs_nid2str works */ + PTLLND = 4, + O2IBLND = 5, + CIBLND = 6, + OPENIBLND = 7, + IIBLND = 8, + LOLND = 9, + RALND = 10, + VIBLND = 11, + MXLND = 12, + GNILND = 13, +}; + +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h new file mode 100644 index 000000000000..a6bac9c36339 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h @@ -0,0 +1,137 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_string.h + * + * Generic string manipulation functions. + * + * Author: Nathan Rutman <nathan.rutman@sun.com> + */ + +#ifndef __LIBCFS_STRING_H__ +#define __LIBCFS_STRING_H__ + +/* libcfs_string.c */ +/* string comparison ignoring case */ +int cfs_strncasecmp(const char *s1, const char *s2, size_t n); +/* Convert a text string to a bitmask */ +int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), + int *oldmask, int minmask, int allmask); + +/* Allocate space for and copy an existing string. + * Must free with kfree(). + */ +char *cfs_strdup(const char *str, u_int32_t flags); + +/* safe vsnprintf */ +int cfs_vsnprintf(char *buf, size_t size, const char *fmt, va_list args); + +/* safe snprintf */ +int cfs_snprintf(char *buf, size_t size, const char *fmt, ...); + +/* trim leading and trailing space characters */ +char *cfs_firststr(char *str, size_t size); + +/** + * Structure to represent NULL-less strings. + */ +struct cfs_lstr { + char *ls_str; + int ls_len; +}; + +/* + * Structure to represent \<range_expr\> token of the syntax. + */ +struct cfs_range_expr { + /* + * Link to cfs_expr_list::el_exprs. + */ + struct list_head re_link; + __u32 re_lo; + __u32 re_hi; + __u32 re_stride; +}; + +struct cfs_expr_list { + struct list_head el_link; + struct list_head el_exprs; +}; + +static inline int +cfs_iswhite(char c) +{ + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + return 1; + default: + break; + } + return 0; +} + +char *cfs_trimwhite(char *str); +int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res); +int cfs_str2num_check(char *str, int nob, unsigned *num, + unsigned min, unsigned max); +int cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max, + int single_tok, struct cfs_range_expr **expr); +int cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list); +int cfs_expr_list_values(struct cfs_expr_list *expr_list, + int max, __u32 **values); +static inline void +cfs_expr_list_values_free(__u32 *values, int num) +{ + /* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed + * by OBD_FREE() if it's called by module other than libcfs & LNet, + * otherwise we will see fake memory leak */ + LIBCFS_FREE(values, num * sizeof(values[0])); +} + +void cfs_expr_list_free(struct cfs_expr_list *expr_list); +void cfs_expr_list_print(struct cfs_expr_list *expr_list); +int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max, + struct cfs_expr_list **elpp); +void cfs_expr_list_free_list(struct list_head *list); +int cfs_ip_addr_parse(char *str, int len, struct list_head *list); +int cfs_ip_addr_match(__u32 addr, struct list_head *list); +void cfs_ip_addr_free(struct list_head *list); + +#define strtoul(str, endp, base) simple_strtoul(str, endp, base) + +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h new file mode 100644 index 000000000000..4bdd77163d5e --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h @@ -0,0 +1,132 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_time.h + * + * Time functions. + * + */ + +#ifndef __LIBCFS_TIME_H__ +#define __LIBCFS_TIME_H__ +/* + * generic time manipulation functions. + */ + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return (cfs_time_t)(t + d); +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return (cfs_time_t)(t1 - t2); +} + +static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2) +{ + return cfs_time_before(t2, t1); +} + +static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2) +{ + return cfs_time_beforeq(t2, t1); +} + + +static inline cfs_time_t cfs_time_shift(int seconds) +{ + return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds)); +} + +static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small, + struct timeval *result) +{ + long r = (long) ( + (large->tv_sec - small->tv_sec) * ONE_MILLION + + (large->tv_usec - small->tv_usec)); + if (result != NULL) { + result->tv_usec = r % ONE_MILLION; + result->tv_sec = r / ONE_MILLION; + } + return r; +} + +static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg) +{ + if (cfs_time_after(cfs_time_current(), + cfs_time_add(now, cfs_time_seconds(15)))) + CERROR("slow %s "CFS_TIME_T" sec\n", msg, + cfs_duration_sec(cfs_time_sub(cfs_time_current(),now))); +} + +#define CFS_RATELIMIT(seconds) \ +({ \ + /* \ + * XXX nikita: non-portable initializer \ + */ \ + static time_t __next_message = 0; \ + int result; \ + \ + if (cfs_time_after(cfs_time_current(), __next_message)) \ + result = 1; \ + else { \ + __next_message = cfs_time_shift(seconds); \ + result = 0; \ + } \ + result; \ +}) + +/* + * helper function similar to do_gettimeofday() of Linux kernel + */ +static inline void cfs_fs_timeval(struct timeval *tv) +{ + cfs_fs_time_t time; + + cfs_fs_time_current(&time); + cfs_fs_time_usec(&time, tv); +} + +/* + * return valid time-out based on user supplied one. Currently we only check + * that time-out is not shorted than allowed. + */ +static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout) +{ + if (timeout < CFS_TICK) + timeout = CFS_TICK; + return timeout; +} + +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h new file mode 100644 index 000000000000..5cc64f327a87 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h @@ -0,0 +1,110 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_workitem.h + * + * Author: Isaac Huang <he.h.huang@oracle.com> + * Liang Zhen <zhen.liang@sun.com> + * + * A workitems is deferred work with these semantics: + * - a workitem always runs in thread context. + * - a workitem can be concurrent with other workitems but is strictly + * serialized with respect to itself. + * - no CPU affinity, a workitem does not necessarily run on the same CPU + * that schedules it. However, this might change in the future. + * - if a workitem is scheduled again before it has a chance to run, it + * runs only once. + * - if a workitem is scheduled while it runs, it runs again after it + * completes; this ensures that events occurring while other events are + * being processed receive due attention. This behavior also allows a + * workitem to reschedule itself. + * + * Usage notes: + * - a workitem can sleep but it should be aware of how that sleep might + * affect others. + * - a workitem runs inside a kernel thread so there's no user space to access. + * - do not use a workitem if the scheduling latency can't be tolerated. + * + * When wi_action returns non-zero, it means the workitem has either been + * freed or reused and workitem scheduler won't touch it any more. + */ + +#ifndef __LIBCFS_WORKITEM_H__ +#define __LIBCFS_WORKITEM_H__ + +struct cfs_wi_sched; + +void cfs_wi_sched_destroy(struct cfs_wi_sched *); +int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt, + int nthrs, struct cfs_wi_sched **); + +struct cfs_workitem; + +typedef int (*cfs_wi_action_t) (struct cfs_workitem *); +typedef struct cfs_workitem { + /** chain on runq or rerunq */ + struct list_head wi_list; + /** working function */ + cfs_wi_action_t wi_action; + /** arg for working function */ + void *wi_data; + /** in running */ + unsigned short wi_running:1; + /** scheduled */ + unsigned short wi_scheduled:1; +} cfs_workitem_t; + +static inline void +cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action) +{ + INIT_LIST_HEAD(&wi->wi_list); + + wi->wi_running = 0; + wi->wi_scheduled = 0; + wi->wi_data = data; + wi->wi_action = action; +} + +void cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi); +int cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi); +void cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi); + +int cfs_wi_startup(void); +void cfs_wi_shutdown(void); + +/** # workitem scheduler loops before reschedule */ +#define CFS_WI_RESCHED 128 + +#endif /* __LIBCFS_WORKITEM_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h b/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h new file mode 100644 index 000000000000..4b7ae1c5bd3b --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h @@ -0,0 +1,286 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LIBCFS_LINUX_KP30_H__ +#define __LIBCFS_LINUX_KP30_H__ + + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/kmod.h> +#include <linux/notifier.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/vmalloc.h> +#include <linux/time.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/version.h> +#include <asm/atomic.h> +#include <asm/uaccess.h> +#include <linux/rwsem.h> +#include <linux/proc_fs.h> +#include <linux/file.h> +#include <linux/smp.h> +#include <linux/ctype.h> +#include <linux/compiler.h> +#ifdef HAVE_MM_INLINE +# include <linux/mm_inline.h> +#endif +#include <linux/kallsyms.h> +#include <linux/moduleparam.h> +#include <linux/scatterlist.h> + +#include <linux/libcfs/linux/portals_compat25.h> + + +#define prepare_work(wq,cb,cbdata) \ +do { \ + INIT_WORK((wq), (void *)(cb)); \ +} while (0) + +#define cfs_get_work_data(type,field,data) container_of(data,type,field) + + +#define our_recalc_sigpending(current) recalc_sigpending() +#define strtok(a,b) strpbrk(a, b) +#define work_struct_t struct work_struct + +#ifdef CONFIG_SMP +#else +#endif + + +#define SEM_COUNT(sem) ((sem)->count) + + +/* ------------------------------------------------------------------- */ + +#define PORTAL_SYMBOL_REGISTER(x) +#define PORTAL_SYMBOL_UNREGISTER(x) + + + + +/******************************************************************************/ +/* Module parameter support */ +#define CFS_MODULE_PARM(name, t, type, perm, desc) \ + module_param(name, type, perm);\ + MODULE_PARM_DESC(name, desc) + +#define CFS_SYSFS_MODULE_PARM 1 /* module parameters accessible via sysfs */ + +/******************************************************************************/ + +#if (__GNUC__) +/* Use the special GNU C __attribute__ hack to have the compiler check the + * printf style argument string against the actual argument count and + * types. + */ +#ifdef printf +# warning printf has been defined as a macro... +# undef printf +#endif + +#endif /* __GNUC__ */ + +# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) +# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) +# define time(a) CURRENT_TIME + +# define cfs_num_present_cpus() num_present_cpus() + +/******************************************************************************/ +/* Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. */ +#define LWT_SUPPORT 0 + +#define LWT_MEMORY (16<<20) + +#ifndef KLWT_SUPPORT +# if !defined(BITS_PER_LONG) +# error "BITS_PER_LONG not defined" +# endif + +/* kernel hasn't defined this? */ +typedef struct { + long long lwte_when; + char *lwte_where; + void *lwte_task; + long lwte_p1; + long lwte_p2; + long lwte_p3; + long lwte_p4; +# if BITS_PER_LONG > 32 + long lwte_pad; +# endif +} lwt_event_t; +#endif /* !KLWT_SUPPORT */ + +#if LWT_SUPPORT +# if !KLWT_SUPPORT + +typedef struct _lwt_page { + struct list_head lwtp_list; + struct page *lwtp_page; + lwt_event_t *lwtp_events; +} lwt_page_t; + +typedef struct { + int lwtc_current_index; + lwt_page_t *lwtc_current_page; +} lwt_cpu_t; + +extern int lwt_enabled; +extern lwt_cpu_t lwt_cpus[]; + +/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. + * This stuff is meant for finding specific problems; it never stays in + * production code... */ + +#define LWTSTR(n) #n +#define LWTWHERE(f,l) f ":" LWTSTR(l) +#define LWT_EVENTS_PER_PAGE (PAGE_CACHE_SIZE / sizeof (lwt_event_t)) + +#define LWT_EVENT(p1, p2, p3, p4) \ +do { \ + unsigned long flags; \ + lwt_cpu_t *cpu; \ + lwt_page_t *p; \ + lwt_event_t *e; \ + \ + if (lwt_enabled) { \ + local_irq_save (flags); \ + \ + cpu = &lwt_cpus[smp_processor_id()]; \ + p = cpu->lwtc_current_page; \ + e = &p->lwtp_events[cpu->lwtc_current_index++]; \ + \ + if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \ + cpu->lwtc_current_page = \ + list_entry (p->lwtp_list.next, \ + lwt_page_t, lwtp_list); \ + cpu->lwtc_current_index = 0; \ + } \ + \ + e->lwte_when = get_cycles(); \ + e->lwte_where = LWTWHERE(__FILE__,__LINE__); \ + e->lwte_task = current; \ + e->lwte_p1 = (long)(p1); \ + e->lwte_p2 = (long)(p2); \ + e->lwte_p3 = (long)(p3); \ + e->lwte_p4 = (long)(p4); \ + \ + local_irq_restore (flags); \ + } \ +} while (0) + +#endif /* !KLWT_SUPPORT */ + +extern int lwt_init (void); +extern void lwt_fini (void); +extern int lwt_lookup_string (int *size, char *knlptr, + char *usrptr, int usrsize); +extern int lwt_control (int enable, int clear); +extern int lwt_snapshot (cfs_cycles_t *now, int *ncpu, int *total_size, + void *user_ptr, int user_size); +#endif /* LWT_SUPPORT */ + +/* ------------------------------------------------------------------ */ + +#define IOCTL_LIBCFS_TYPE long + +#ifdef __CYGWIN__ +# ifndef BITS_PER_LONG +# define BITS_PER_LONG 64 +# endif +#endif + +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) + +/* this is a bit chunky */ + +#define _LWORDSIZE BITS_PER_LONG + +# define LPU64 "%llu" +# define LPD64 "%lld" +# define LPX64 "%#llx" +# define LPX64i "%llx" +# define LPO64 "%#llo" +# define LPF64 "L" + +/* + * long_ptr_t & ulong_ptr_t, same to "long" for gcc + */ +# define LPLU "%lu" +# define LPLD "%ld" +# define LPLX "%#lx" + +/* + * pid_t + */ +# define LPPID "%d" + + +#undef _LWORDSIZE + +/* compat macroses */ + + +#ifndef get_cpu +# ifdef CONFIG_PREEMPT +# define get_cpu() ({ preempt_disable(); smp_processor_id(); }) +# define put_cpu() preempt_enable() +# else +# define get_cpu() smp_processor_id() +# define put_cpu() +# endif +#else +#endif /* get_cpu & put_cpu */ + +#define INIT_CTL_NAME(a) +#define INIT_STRATEGY(a) + +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h new file mode 100644 index 000000000000..292a3ba1fb96 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h @@ -0,0 +1,125 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LIBCFS_LINUX_LIBCFS_H__ +#define __LIBCFS_LINUX_LIBCFS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + + +#include <stdarg.h> +#include <linux/libcfs/linux/linux-cpu.h> +#include <linux/libcfs/linux/linux-time.h> +#include <linux/libcfs/linux/linux-mem.h> +#include <linux/libcfs/linux/linux-prim.h> +#include <linux/libcfs/linux/linux-lock.h> +#include <linux/libcfs/linux/linux-fs.h> +#include <linux/libcfs/linux/linux-tcpip.h> +#include <linux/libcfs/linux/linux-bitops.h> +#include <linux/libcfs/linux/linux-types.h> +#include <linux/libcfs/linux/kp30.h> + +#include <asm/types.h> +#include <linux/types.h> +#include <asm/timex.h> +#include <linux/sched.h> /* THREAD_SIZE */ +#include <linux/rbtree.h> + +#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) + +#if !defined(__x86_64__) +# ifdef __ia64__ +# define CDEBUG_STACK() (THREAD_SIZE - \ + ((unsigned long)__builtin_dwarf_cfa() & \ + (THREAD_SIZE - 1))) +# else +# define CDEBUG_STACK() (THREAD_SIZE - \ + ((unsigned long)__builtin_frame_address(0) & \ + (THREAD_SIZE - 1))) +# endif /* __ia64__ */ + +#define __CHECK_STACK(msgdata, mask, cdls) \ +do { \ + if (unlikely(CDEBUG_STACK() > libcfs_stack)) { \ + LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL); \ + libcfs_stack = CDEBUG_STACK(); \ + libcfs_debug_msg(msgdata, \ + "maximum lustre stack %lu\n", \ + CDEBUG_STACK()); \ + (msgdata)->msg_mask = mask; \ + (msgdata)->msg_cdls = cdls; \ + dump_stack(); \ + /*panic("LBUG");*/ \ + } \ +} while (0) +#define CFS_CHECK_STACK(msgdata, mask, cdls) __CHECK_STACK(msgdata, mask, cdls) +#else /* __x86_64__ */ +#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while(0) +#define CDEBUG_STACK() (0L) +#endif /* __x86_64__ */ + +/* initial pid */ +#define LUSTRE_LNET_PID 12345 + +#define ENTRY_NESTING_SUPPORT (1) +#define ENTRY_NESTING do {;} while (0) +#define EXIT_NESTING do {;} while (0) +#define __current_nesting_level() (0) + +/** + * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) + * + * Implementation is in linux-curproc.c + */ +#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm) + +#include <linux/capability.h> + +/* long integer with size equal to pointer */ +typedef unsigned long ulong_ptr_t; +typedef long long_ptr_t; + +#ifndef WITH_WATCHDOG +#define WITH_WATCHDOG +#endif + + + + +#endif /* _LINUX_LIBCFS_H */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h new file mode 100644 index 000000000000..43936e349dd4 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h @@ -0,0 +1,38 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-bitops.h + */ +#include <linux/bitops.h> diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h new file mode 100644 index 000000000000..224371c92f7c --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h @@ -0,0 +1,175 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-mem.h + * + * Basic library routines. + * + * Author: liang@whamcloud.com + */ + +#ifndef __LIBCFS_LINUX_CPU_H__ +#define __LIBCFS_LINUX_CPU_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +#include <linux/cpu.h> +#include <linux/cpuset.h> +#include <linux/topology.h> +#include <linux/version.h> + + +#ifdef CONFIG_SMP + +#define HAVE_LIBCFS_CPT + +/** virtual processing unit */ +struct cfs_cpu_partition { + /* CPUs mask for this partition */ + cpumask_t *cpt_cpumask; + /* nodes mask for this partition */ + nodemask_t *cpt_nodemask; + /* spread rotor for NUMA allocator */ + unsigned cpt_spread_rotor; +}; + +/** descriptor for CPU partitions */ +struct cfs_cpt_table { + /* version, reserved for hotplug */ + unsigned ctb_version; + /* spread rotor for NUMA allocator */ + unsigned ctb_spread_rotor; + /* # of CPU partitions */ + unsigned ctb_nparts; + /* partitions tables */ + struct cfs_cpu_partition *ctb_parts; + /* shadow HW CPU to CPU partition ID */ + int *ctb_cpu2cpt; + /* all cpus in this partition table */ + cpumask_t *ctb_cpumask; + /* all nodes in this partition table */ + nodemask_t *ctb_nodemask; +}; + +void cfs_cpu_core_siblings(int cpu, cpumask_t *mask); +void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask); +void cfs_node_to_cpumask(int node, cpumask_t *mask); +int cfs_cpu_core_nsiblings(int cpu); +int cfs_cpu_ht_nsiblings(int cpu); + +/** + * comment out definitions for compatible layer + * #define CFS_CPU_NR NR_CPUS + * + * typedef cpumask_t cfs_cpumask_t; + * + * #define cfs_cpu_current() smp_processor_id() + * #define cfs_cpu_online(i) cpu_online(i) + * #define cfs_cpu_online_num() num_online_cpus() + * #define cfs_cpu_online_for_each(i) for_each_online_cpu(i) + * #define cfs_cpu_possible_num() num_possible_cpus() + * #define cfs_cpu_possible_for_each(i) for_each_possible_cpu(i) + * + * #ifdef CONFIG_CPUMASK_SIZE + * #define cfs_cpu_mask_size() cpumask_size() + * #else + * #define cfs_cpu_mask_size() sizeof(cfs_cpumask_t) + * #endif + * + * #define cfs_cpu_mask_set(i, mask) cpu_set(i, mask) + * #define cfs_cpu_mask_unset(i, mask) cpu_clear(i, mask) + * #define cfs_cpu_mask_isset(i, mask) cpu_isset(i, mask) + * #define cfs_cpu_mask_clear(mask) cpus_clear(mask) + * #define cfs_cpu_mask_empty(mask) cpus_empty(mask) + * #define cfs_cpu_mask_weight(mask) cpus_weight(mask) + * #define cfs_cpu_mask_first(mask) first_cpu(mask) + * #define cfs_cpu_mask_any_online(mask) (any_online_cpu(mask) != NR_CPUS) + * #define cfs_cpu_mask_for_each(i, mask) for_each_cpu_mask(i, mask) + * #define cfs_cpu_mask_bind(t, mask) set_cpus_allowed(t, mask) + * + * #ifdef HAVE_CPUMASK_COPY + * #define cfs_cpu_mask_copy(dst, src) cpumask_copy(dst, src) + * #else + * #define cfs_cpu_mask_copy(dst, src) memcpy(dst, src, sizeof(*src)) + * #endif + * + * static inline void + * cfs_cpu_mask_of_online(cfs_cpumask_t *mask) + * { + * cfs_cpu_mask_copy(mask, &cpu_online_map); + * } + * + * #ifdef CONFIG_NUMA + * + * #define CFS_NODE_NR MAX_NUMNODES + * + * typedef nodemask_t cfs_node_mask_t; + * + * #define cfs_node_of_cpu(cpu) cpu_to_node(cpu) + * #define cfs_node_online(i) node_online(i) + * #define cfs_node_online_num() num_online_nodes() + * #define cfs_node_online_for_each(i) for_each_online_node(i) + * #define cfs_node_possible_num() num_possible_nodes() + * #define cfs_node_possible_for_each(i) for_each_node(i) + * + * static inline void cfs_node_to_cpumask(int node, cfs_cpumask_t *mask) + * { + * #if defined(HAVE_NODE_TO_CPUMASK) + * *mask = node_to_cpumask(node); + * #elif defined(HAVE_CPUMASK_OF_NODE) + * cfs_cpu_mask_copy(mask, cpumask_of_node(node)); + * #else + * # error "Needs node_to_cpumask or cpumask_of_node" + * #endif + * } + * + * #define cfs_node_mask_set(i, mask) node_set(i, mask) + * #define cfs_node_mask_unset(i, mask) node_clear(i, mask) + * #define cfs_node_mask_isset(i, mask) node_isset(i, mask) + * #define cfs_node_mask_clear(mask) nodes_reset(mask) + * #define cfs_node_mask_empty(mask) nodes_empty(mask) + * #define cfs_node_mask_weight(mask) nodes_weight(mask) + * #define cfs_node_mask_for_each(i, mask) for_each_node_mask(i, mask) + * #define cfs_node_mask_copy(dst, src) memcpy(dst, src, sizeof(*src)) + * + * static inline void + * cfs_node_mask_of_online(cfs_node_mask_t *mask) + * { + * cfs_node_mask_copy(mask, &node_online_map); + * } + * + * #endif + */ + +#endif /* CONFIG_SMP */ +#endif /* __LIBCFS_LINUX_CPU_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h new file mode 100644 index 000000000000..97c771cf691f --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h @@ -0,0 +1,49 @@ + /* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + */ + +/** + * Linux crypto hash specific functions. + */ + +/** + * Functions for start/stop shash CRC32 algorithm. + */ +int cfs_crypto_crc32_register(void); +void cfs_crypto_crc32_unregister(void); + +/** + * Functions for start/stop shash adler32 algorithm. + */ +int cfs_crypto_adler32_register(void); +void cfs_crypto_adler32_unregister(void); + +/** + * Functions for start/stop shash crc32 pclmulqdq + */ +int cfs_crypto_crc32_pclmul_register(void); +void cfs_crypto_crc32_pclmul_unregister(void); diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h new file mode 100644 index 000000000000..eebf138f21e5 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h @@ -0,0 +1,92 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-fs.h + * + * Basic library routines. + */ + +#ifndef __LIBCFS_LINUX_CFS_FS_H__ +#define __LIBCFS_LINUX_CFS_FS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/mount.h> +#include <linux/backing-dev.h> +#include <linux/posix_acl_xattr.h> + +#define filp_size(f) \ + (i_size_read((f)->f_dentry->d_inode)) +#define filp_poff(f) \ + (&(f)->f_pos) + +# define do_fsync(fp, flag) \ + ((fp)->f_op->fsync(fp, 0, LLONG_MAX, flag)) + +#define filp_read(fp, buf, size, pos) \ + ((fp)->f_op->read((fp), (buf), (size), pos)) + +#define filp_write(fp, buf, size, pos) \ + ((fp)->f_op->write((fp), (buf), (size), pos)) + +#define filp_fsync(fp) \ + do_fsync(fp, 1) + +#define flock_type(fl) ((fl)->fl_type) +#define flock_set_type(fl, type) do { (fl)->fl_type = (type); } while (0) +#define flock_pid(fl) ((fl)->fl_pid) +#define flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while (0) +#define flock_start(fl) ((fl)->fl_start) +#define flock_set_start(fl, st) do { (fl)->fl_start = (st); } while (0) +#define flock_end(fl) ((fl)->fl_end) +#define flock_set_end(fl, end) do { (fl)->fl_end = (end); } while (0) + +#ifndef IFSHIFT +#define IFSHIFT 12 +#endif + +#ifndef IFTODT +#define IFTODT(type) (((type) & S_IFMT) >> IFSHIFT) +#endif +#ifndef DTTOIF +#define DTTOIF(dirtype) ((dirtype) << IFSHIFT) +#endif + +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h new file mode 100644 index 000000000000..6fbcbf3ab0d3 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h @@ -0,0 +1,204 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-lock.h + * + * Basic library routines. + */ + +#ifndef __LIBCFS_LINUX_CFS_LOCK_H__ +#define __LIBCFS_LINUX_CFS_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +#include <linux/mutex.h> + +/* + * IMPORTANT !!!!!!!! + * + * All locks' declaration are not guaranteed to be initialized, + * Althought some of they are initialized in Linux. All locks + * declared by CFS_DECL_* should be initialized explicitly. + */ + +/* + * spin_lock "implementation" (use Linux kernel's primitives) + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_lock_bh(x) + * - spin_lock_bh_init(x) + * - spin_unlock(x) + * - spin_unlock_bh(x) + * - spin_trylock(x) + * - spin_is_locked(x) + * + * - spin_lock_irq(x) + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + * - read_lock_irqsave(lock, f) + * - write_lock_irqsave(lock, f) + * - write_unlock_irqrestore(lock, f) + */ + +/* + * spinlock "implementation" + */ + + + + +/* + * rw_semaphore "implementation" (use Linux kernel's primitives) + * + * - sema_init(x) + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ + + +#define fini_rwsem(s) do {} while (0) + + +/* + * rwlock_t "implementation" (use Linux kernel's primitives) + * + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + * - write_lock_bh(x) + * - write_unlock_bh(x) + * + * - RW_LOCK_UNLOCKED + */ + + +#ifndef DEFINE_RWLOCK +#define DEFINE_RWLOCK(lock) rwlock_t lock = __RW_LOCK_UNLOCKED(lock) +#endif + +/* + * completion "implementation" (use Linux kernel's primitives) + * + * - DECLARE_COMPLETION(work) + * - INIT_COMPLETION(c) + * - COMPLETION_INITIALIZER(work) + * - init_completion(c) + * - complete(c) + * - wait_for_completion(c) + * - wait_for_completion_interruptible(c) + * - fini_completion(c) + */ +#define fini_completion(c) do { } while (0) + +/* + * semaphore "implementation" (use Linux kernel's primitives) + * - DEFINE_SEMAPHORE(name) + * - sema_init(sem, val) + * - up(sem) + * - down(sem) + * - down_interruptible(sem) + * - down_trylock(sem) + */ + +/* + * mutex "implementation" (use Linux kernel's primitives) + * + * - DEFINE_MUTEX(name) + * - mutex_init(x) + * - mutex_lock(x) + * - mutex_unlock(x) + * - mutex_trylock(x) + * - mutex_is_locked(x) + * - mutex_destroy(x) + */ + +#ifndef lockdep_set_class + +/************************************************************************** + * + * Lockdep "implementation". Also see liblustre.h + * + **************************************************************************/ + +struct lock_class_key { + ; +}; + +#define lockdep_set_class(lock, key) \ + do { (void)sizeof(lock); (void)sizeof(key); } while (0) +/* This has to be a macro, so that `subclass' can be undefined in kernels + * that do not support lockdep. */ + + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} +#else + +#endif /* lockdep_set_class */ + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +#ifndef mutex_lock_nested +#define mutex_lock_nested(mutex, subclass) mutex_lock(mutex) +#endif + +#ifndef spin_lock_nested +#define spin_lock_nested(lock, subclass) spin_lock(lock) +#endif + +#ifndef down_read_nested +#define down_read_nested(lock, subclass) down_read(lock) +#endif + +#ifndef down_write_nested +#define down_write_nested(lock, subclass) down_write(lock) +#endif +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ + + +#endif /* __LIBCFS_LINUX_CFS_LOCK_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h new file mode 100644 index 000000000000..042a2bc432be --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h @@ -0,0 +1,120 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-mem.h + * + * Basic library routines. + */ + +#ifndef __LIBCFS_LINUX_CFS_MEM_H__ +#define __LIBCFS_LINUX_CFS_MEM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/memcontrol.h> +#include <linux/mm_inline.h> + +#define CFS_PAGE_MASK (~((__u64)PAGE_CACHE_SIZE-1)) +#define page_index(p) ((p)->index) + +#define memory_pressure_get() (current->flags & PF_MEMALLOC) +#define memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0) +#define memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0) + +#if BITS_PER_LONG == 32 +/* limit to lowmem on 32-bit systems */ +#define NUM_CACHEPAGES \ + min(num_physpages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4) +#else +#define NUM_CACHEPAGES num_physpages +#endif + +/* + * In Linux there is no way to determine whether current execution context is + * blockable. + */ +#define ALLOC_ATOMIC_TRY GFP_ATOMIC + +#define DECL_MMSPACE mm_segment_t __oldfs +#define MMSPACE_OPEN \ + do { __oldfs = get_fs(); set_fs(get_ds());} while(0) +#define MMSPACE_CLOSE set_fs(__oldfs) + +/* + * Shrinker + */ + +# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \ + struct shrinker *shrinker, \ + struct shrink_control *sc +# define shrink_param(sc, var) ((sc)->var) + +typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)); + +static inline +struct shrinker *set_shrinker(int seek, shrinker_t func) +{ + struct shrinker *s; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return (NULL); + + s->shrink = func; + s->seeks = seek; + + register_shrinker(s); + + return s; +} + +static inline +void remove_shrinker(struct shrinker *shrinker) +{ + if (shrinker == NULL) + return; + + unregister_shrinker(shrinker); + kfree(shrinker); +} + +#endif /* __LINUX_CFS_MEM_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h new file mode 100644 index 000000000000..a4963a8dfdd8 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h @@ -0,0 +1,241 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-prim.h + * + * Basic library routines. + */ + +#ifndef __LIBCFS_LINUX_CFS_PRIM_H__ +#define __LIBCFS_LINUX_CFS_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/timer.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kthread.h> +#include <linux/random.h> + +#include <linux/miscdevice.h> +#include <linux/libcfs/linux/portals_compat25.h> +#include <asm/div64.h> + +#include <linux/libcfs/linux/linux-time.h> + + +/* + * CPU + */ +#ifdef for_each_possible_cpu +#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu) +#elif defined(for_each_cpu) +#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu) +#endif + +#ifdef NR_CPUS +#else +#define NR_CPUS 1 +#endif + +/* + * cache + */ + +/* + * IRQs + */ + + +/* + * Pseudo device register + */ +typedef struct miscdevice psdev_t; + +/* + * Sysctl register + */ +typedef struct ctl_table ctl_table_t; +typedef struct ctl_table_header ctl_table_header_t; + +#define cfs_register_sysctl_table(t, a) register_sysctl_table(t) + +#define DECLARE_PROC_HANDLER(name) \ +static int \ +LL_PROC_PROTO(name) \ +{ \ + DECLARE_LL_PROC_PPOS_DECL; \ + \ + return proc_call_handler(table->data, write, \ + ppos, buffer, lenp, \ + __##name); \ +} + +/* + * Symbol register + */ +#define cfs_symbol_register(s, p) do {} while(0) +#define cfs_symbol_unregister(s) do {} while(0) +#define cfs_symbol_get(s) symbol_get(s) +#define cfs_symbol_put(s) symbol_put(s) + +typedef struct module module_t; + +/* + * Proc file system APIs + */ +typedef struct proc_dir_entry proc_dir_entry_t; + +/* + * Wait Queue + */ + + +typedef long cfs_task_state_t; + +#define CFS_DECL_WAITQ(wq) DECLARE_WAIT_QUEUE_HEAD(wq) + +/* + * Task struct + */ +typedef struct task_struct task_t; +#define DECL_JOURNAL_DATA void *journal_info +#define PUSH_JOURNAL do { \ + journal_info = current->journal_info; \ + current->journal_info = NULL; \ + } while(0) +#define POP_JOURNAL do { \ + current->journal_info = journal_info; \ + } while(0) + +/* Module interfaces */ +#define cfs_module(name, version, init, fini) \ + module_init(init); \ + module_exit(fini) + +/* + * Signal + */ + +/* + * Timer + */ +typedef struct timer_list timer_list_t; + + +#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */ +#define __wait_event_timeout(wq, condition, timeout, ret) \ +do { \ + int __ret = 0; \ + if (!(condition)) { \ + wait_queue_t __wait; \ + unsigned long expire; \ + \ + init_waitqueue_entry(&__wait, current); \ + expire = timeout + jiffies; \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + if (jiffies > expire) { \ + ret = jiffies - expire; \ + break; \ + } \ + schedule_timeout(timeout); \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ + } \ +} while (0) +/* + retval == 0; condition met; we're good. + retval > 0; timed out. +*/ +#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \ +do { \ + ret = 0; \ + if (!(condition)) \ + __wait_event_timeout(wq, condition, timeout, ret); \ +} while (0) +#else +#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \ + ret = wait_event_timeout(wq, condition, timeout) +#endif + +#define cfs_waitq_wait_event_interruptible_timeout(wq, c, timeout, ret) \ + ret = wait_event_interruptible_timeout(wq, c, timeout) + +/* + * atomic + */ + + +#define cfs_atomic_add_unless(atom, a, u) atomic_add_unless(atom, a, u) +#define cfs_atomic_cmpxchg(atom, old, nv) atomic_cmpxchg(atom, old, nv) + +/* + * membar + */ + + +/* + * interrupt + */ + + +/* + * might_sleep + */ + +/* + * group_info + */ +typedef struct group_info group_info_t; + + +/* + * Random bytes + */ +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h new file mode 100644 index 000000000000..687f33f4e8a7 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h @@ -0,0 +1,87 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-tcpip.h + * + * Basic library routines. + */ + +#ifndef __LIBCFS_LINUX_CFS_TCP_H__ +#define __LIBCFS_LINUX_CFS_TCP_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +#include <net/sock.h> + +#ifndef HIPQUAD +// XXX Should just kill all users +#if defined(__LITTLE_ENDIAN) +#define HIPQUAD(addr) \ + ((unsigned char *)&addr)[3], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[0] +#elif defined(__BIG_ENDIAN) +#define HIPQUAD NIPQUAD +#else +#error "Please fix asm/byteorder.h" +#endif /* __LITTLE_ENDIAN */ +#endif + +typedef struct socket socket_t; + +#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf) +#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags) + +static inline int +cfs_sock_error(struct socket *sock) +{ + return sock->sk->sk_err; +} + +static inline int +cfs_sock_wmem_queued(struct socket *sock) +{ + return sock->sk->sk_wmem_queued; +} + +#define cfs_sk_sleep(sk) sk_sleep(sk) + +#define DEFAULT_NET (&init_net) + +#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h new file mode 100644 index 000000000000..4a48b914b42a --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h @@ -0,0 +1,275 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/linux/linux-time.h + * + * Implementation of portable time API for Linux (kernel and user-level). + * + * Author: Nikita Danilov <nikita@clusterfs.com> + */ + +#ifndef __LIBCFS_LINUX_LINUX_TIME_H__ +#define __LIBCFS_LINUX_LINUX_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead +#endif + + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_impl_time_before (cfs_time_t, cfs_time_t); + * int cfs_impl_time_before_eq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION 1000000 + + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/time.h> +#include <asm/div64.h> + +#include <linux/libcfs/linux/portals_compat25.h> + +/* + * post 2.5 kernels. + */ + +#include <linux/jiffies.h> + +typedef struct timespec cfs_fs_time_t; + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + v->tv_sec = t->tv_sec; + v->tv_usec = t->tv_nsec / 1000; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + *s = *t; +} + +/* + * internal helper function used by cfs_fs_time_before*() + */ +static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec; +} + + +/* + * Generic kernel stuff + */ + +typedef unsigned long cfs_time_t; /* jiffies */ +typedef long cfs_duration_t; +typedef cycles_t cfs_cycles_t; + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return time_before(t1, t2); +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return time_before_eq(t1, t2); +} + +static inline cfs_time_t cfs_time_current(void) +{ + return jiffies; +} + +static inline time_t cfs_time_current_sec(void) +{ + return get_seconds(); +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + *t = CURRENT_TIME; +} + +static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return t->tv_sec; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2); +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2); +} + +#if 0 +static inline cfs_duration_t cfs_duration_build(int64_t nano) +{ +#if (BITS_PER_LONG == 32) + /* We cannot use do_div(t, ONE_BILLION), do_div can only process + * 64 bits n and 32 bits base */ + int64_t t = nano * HZ; + do_div(t, 1000); + do_div(t, 1000000); + return (cfs_duration_t)t; +#else + return (nano * HZ / ONE_BILLION); +#endif +} +#endif + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return ((cfs_duration_t)seconds) * HZ; +} + +static inline time_t cfs_duration_sec(cfs_duration_t d) +{ + return d / HZ; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ +#if (BITS_PER_LONG == 32) && (HZ > 4096) + __u64 t; + + s->tv_sec = d / HZ; + t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION; + do_div(t, HZ); + s->tv_usec = t; +#else + s->tv_sec = d / HZ; + s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * \ + ONE_MILLION) / HZ; +#endif +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ +#if (BITS_PER_LONG == 32) + __u64 t; + + s->tv_sec = d / HZ; + t = (d - s->tv_sec * HZ) * ONE_BILLION; + do_div(t, HZ); + s->tv_nsec = t; +#else + s->tv_sec = d / HZ; + s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ; +#endif +} + +#define cfs_time_current_64 get_jiffies_64 + +static inline __u64 cfs_time_add_64(__u64 t, __u64 d) +{ + return t + d; +} + +static inline __u64 cfs_time_shift_64(int seconds) +{ + return cfs_time_add_64(cfs_time_current_64(), + cfs_time_seconds(seconds)); +} + +static inline int cfs_time_before_64(__u64 t1, __u64 t2) +{ + return (__s64)t2 - (__s64)t1 > 0; +} + +static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2) +{ + return (__s64)t2 - (__s64)t1 >= 0; +} + + +/* + * One jiffy + */ +#define CFS_TICK (1) + +#define CFS_TIME_T "%lu" +#define CFS_DURATION_T "%ld" + + +#endif /* __LIBCFS_LINUX_LINUX_TIME_H__ */ +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h new file mode 100644 index 000000000000..142394925567 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h @@ -0,0 +1,36 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/user-bitops.h + */ +#include <linux/types.h> diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h b/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h new file mode 100644 index 000000000000..132a4bec3575 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h @@ -0,0 +1,114 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__ +#define __LIBCFS_LINUX_PORTALS_COMPAT_H__ + +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved +#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG +# define SIGNAL_MASK_ASSERT() \ + LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC) +#else +# define SIGNAL_MASK_ASSERT() +#endif +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved + +#define SIGNAL_MASK_LOCK(task, flags) \ + spin_lock_irqsave(&task->sighand->siglock, flags) +#define SIGNAL_MASK_UNLOCK(task, flags) \ + spin_unlock_irqrestore(&task->sighand->siglock, flags) +#define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp, 1) +#define clear_tsk_thread_flag(current, TIF_SIGPENDING) clear_tsk_thread_flag(current, \ + TIF_SIGPENDING) +# define smp_num_cpus num_online_cpus() + +#define cfs_wait_event_interruptible(wq, condition, ret) \ + ret = wait_event_interruptible(wq, condition) +#define cfs_wait_event_interruptible_exclusive(wq, condition, ret) \ + ret = wait_event_interruptible_exclusive(wq, condition) + +#define THREAD_NAME(comm, len, fmt, a...) \ + snprintf(comm, len, fmt, ## a) + +/* 2.6 alloc_page users can use page->lru */ +#define PAGE_LIST_ENTRY lru +#define PAGE_LIST(page) ((page)->lru) + +#ifndef __user +#define __user +#endif + +#ifndef __fls +#define __cfs_fls fls +#else +#define __cfs_fls __fls +#endif + +#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \ + proc_dointvec(table, write, buffer, lenp, ppos); + +#define ll_proc_dolongvec(table, write, filp, buffer, lenp, ppos) \ + proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \ + proc_dostring(table, write, buffer, lenp, ppos); +#define LL_PROC_PROTO(name) \ + name(ctl_table_t *table, int write, \ + void __user *buffer, size_t *lenp, loff_t *ppos) +#define DECLARE_LL_PROC_PPOS_DECL + +/* helper for sysctl handlers */ +int proc_call_handler(void *data, int write, + loff_t *ppos, void *buffer, size_t *lenp, + int (*handler)(void *data, int write, + loff_t pos, void *buffer, int len)); +/* + * CPU + */ +#ifdef for_each_possible_cpu +#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu) +#elif defined(for_each_cpu) +#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu) +#endif + +#ifdef NR_CPUS +#else +#define NR_CPUS 1 +#endif + +#define cfs_register_sysctl_table(t, a) register_sysctl_table(t) + +#endif /* _PORTALS_COMPAT_H */ diff --git a/drivers/staging/lustre/include/linux/libcfs/lucache.h b/drivers/staging/lustre/include/linux/libcfs/lucache.h new file mode 100644 index 000000000000..7ae36fc88d77 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/lucache.h @@ -0,0 +1,162 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef _LUCACHE_H +#define _LUCACHE_H + +#include <linux/libcfs/libcfs.h> + +/** \defgroup ucache ucache + * + * @{ + */ + +#define UC_CACHE_NEW 0x01 +#define UC_CACHE_ACQUIRING 0x02 +#define UC_CACHE_INVALID 0x04 +#define UC_CACHE_EXPIRED 0x08 + +#define UC_CACHE_IS_NEW(i) ((i)->ue_flags & UC_CACHE_NEW) +#define UC_CACHE_IS_INVALID(i) ((i)->ue_flags & UC_CACHE_INVALID) +#define UC_CACHE_IS_ACQUIRING(i) ((i)->ue_flags & UC_CACHE_ACQUIRING) +#define UC_CACHE_IS_EXPIRED(i) ((i)->ue_flags & UC_CACHE_EXPIRED) +#define UC_CACHE_IS_VALID(i) ((i)->ue_flags == 0) + +#define UC_CACHE_SET_NEW(i) (i)->ue_flags |= UC_CACHE_NEW +#define UC_CACHE_SET_INVALID(i) (i)->ue_flags |= UC_CACHE_INVALID +#define UC_CACHE_SET_ACQUIRING(i) (i)->ue_flags |= UC_CACHE_ACQUIRING +#define UC_CACHE_SET_EXPIRED(i) (i)->ue_flags |= UC_CACHE_EXPIRED +#define UC_CACHE_SET_VALID(i) (i)->ue_flags = 0 + +#define UC_CACHE_CLEAR_NEW(i) (i)->ue_flags &= ~UC_CACHE_NEW +#define UC_CACHE_CLEAR_ACQUIRING(i) (i)->ue_flags &= ~UC_CACHE_ACQUIRING +#define UC_CACHE_CLEAR_INVALID(i) (i)->ue_flags &= ~UC_CACHE_INVALID +#define UC_CACHE_CLEAR_EXPIRED(i) (i)->ue_flags &= ~UC_CACHE_EXPIRED + +struct upcall_cache_entry; + +struct md_perm { + lnet_nid_t mp_nid; + __u32 mp_perm; +}; + +struct md_identity { + struct upcall_cache_entry *mi_uc_entry; + uid_t mi_uid; + gid_t mi_gid; + group_info_t *mi_ginfo; + int mi_nperms; + struct md_perm *mi_perms; +}; + +struct upcall_cache_entry { + struct list_head ue_hash; + __u64 ue_key; + atomic_t ue_refcount; + int ue_flags; + wait_queue_head_t ue_waitq; + cfs_time_t ue_acquire_expire; + cfs_time_t ue_expire; + union { + struct md_identity identity; + } u; +}; + +#define UC_CACHE_HASH_SIZE (128) +#define UC_CACHE_HASH_INDEX(id) ((id) & (UC_CACHE_HASH_SIZE - 1)) +#define UC_CACHE_UPCALL_MAXPATH (1024UL) + +struct upcall_cache; + +struct upcall_cache_ops { + void (*init_entry)(struct upcall_cache_entry *, void *args); + void (*free_entry)(struct upcall_cache *, + struct upcall_cache_entry *); + int (*upcall_compare)(struct upcall_cache *, + struct upcall_cache_entry *, + __u64 key, void *args); + int (*downcall_compare)(struct upcall_cache *, + struct upcall_cache_entry *, + __u64 key, void *args); + int (*do_upcall)(struct upcall_cache *, + struct upcall_cache_entry *); + int (*parse_downcall)(struct upcall_cache *, + struct upcall_cache_entry *, void *); +}; + +struct upcall_cache { + struct list_head uc_hashtable[UC_CACHE_HASH_SIZE]; + spinlock_t uc_lock; + rwlock_t uc_upcall_rwlock; + + char uc_name[40]; /* for upcall */ + char uc_upcall[UC_CACHE_UPCALL_MAXPATH]; + int uc_acquire_expire; /* seconds */ + int uc_entry_expire; /* seconds */ + struct upcall_cache_ops *uc_ops; +}; + +struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache, + __u64 key, void *args); +void upcall_cache_put_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry); +int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, + void *args); +void upcall_cache_flush_idle(struct upcall_cache *cache); +void upcall_cache_flush_all(struct upcall_cache *cache); +void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args); +struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, + struct upcall_cache_ops *ops); +void upcall_cache_cleanup(struct upcall_cache *cache); + +#if 0 +struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, + __u64 key, __u32 primary, + __u32 ngroups, __u32 *groups); +void upcall_cache_put_entry(struct upcall_cache *hash, + struct upcall_cache_entry *entry); +int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key, + __u32 primary, __u32 ngroups, __u32 *groups); +void upcall_cache_flush_idle(struct upcall_cache *cache); +void upcall_cache_flush_all(struct upcall_cache *cache); +struct upcall_cache *upcall_cache_init(const char *name); +void upcall_cache_cleanup(struct upcall_cache *hash); + +#endif + +/** @} ucache */ + +#endif /* _LUCACHE_H */ diff --git a/drivers/staging/lustre/include/linux/libcfs/params_tree.h b/drivers/staging/lustre/include/linux/libcfs/params_tree.h new file mode 100644 index 000000000000..3f18a4467037 --- /dev/null +++ b/drivers/staging/lustre/include/linux/libcfs/params_tree.h @@ -0,0 +1,166 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * API and structure definitions for params_tree. + * + * Author: LiuYing <emoly.liu@oracle.com> + */ +#ifndef __PARAMS_TREE_H__ +#define __PARAMS_TREE_H__ + +#include <linux/libcfs/libcfs.h> + +#undef LPROCFS +#if defined(CONFIG_PROC_FS) +# define LPROCFS +#endif + +#ifdef LPROCFS +typedef struct file cfs_param_file_t; +typedef struct inode cfs_inode_t; +typedef struct proc_inode cfs_proc_inode_t; +typedef struct seq_file cfs_seq_file_t; +typedef struct seq_operations cfs_seq_ops_t; +typedef struct file_operations cfs_param_file_ops_t; +typedef module_t *cfs_param_module_t; +typedef struct proc_dir_entry cfs_param_dentry_t; +typedef struct poll_table_struct cfs_poll_table_t; +#define CFS_PARAM_MODULE THIS_MODULE +#define cfs_file_private(file) (file->private_data) +#define cfs_dentry_data(dentry) (dentry->data) +#define cfs_proc_inode_pde(proc_inode) (proc_inode->pde) +#define cfs_proc_inode(proc_inode) (proc_inode->vfs_inode) +#define cfs_seq_read_common seq_read +#define cfs_seq_lseek_common seq_lseek +#define cfs_seq_private(seq) (seq->private) +#define cfs_seq_printf(seq, format, ...) seq_printf(seq, format, \ + ## __VA_ARGS__) +#define cfs_seq_release(inode, file) seq_release(inode, file) +#define cfs_seq_puts(seq, s) seq_puts(seq, s) +#define cfs_seq_putc(seq, s) seq_putc(seq, s) +#define cfs_seq_read(file, buf, count, ppos, rc) (rc = seq_read(file, buf, \ + count, ppos)) +#define cfs_seq_open(file, ops, rc) (rc = seq_open(file, ops)) + +#else /* !LPROCFS */ + +typedef struct cfs_params_file { + void *param_private; + loff_t param_pos; + unsigned int param_flags; +} cfs_param_file_t; + +typedef struct cfs_param_inode { + void *param_private; +} cfs_inode_t; + +typedef struct cfs_param_dentry { + void *param_data; +} cfs_param_dentry_t; + +typedef struct cfs_proc_inode { + cfs_param_dentry_t *param_pde; + cfs_inode_t param_inode; +} cfs_proc_inode_t; + +struct cfs_seq_operations; +typedef struct cfs_seq_file { + char *buf; + size_t size; + size_t from; + size_t count; + loff_t index; + loff_t version; + struct mutex lock; + struct cfs_seq_operations *op; + void *private; +} cfs_seq_file_t; + +typedef struct cfs_seq_operations { + void *(*start) (cfs_seq_file_t *m, loff_t *pos); + void (*stop) (cfs_seq_file_t *m, void *v); + void *(*next) (cfs_seq_file_t *m, void *v, loff_t *pos); + int (*show) (cfs_seq_file_t *m, void *v); +} cfs_seq_ops_t; + +typedef void *cfs_param_module_t; +typedef void *cfs_poll_table_t; + +typedef struct cfs_param_file_ops { + cfs_param_module_t owner; + int (*open) (cfs_inode_t *, struct file *); + loff_t (*llseek)(struct file *, loff_t, int); + int (*release) (cfs_inode_t *, cfs_param_file_t *); + unsigned int (*poll) (struct file *, cfs_poll_table_t *); + ssize_t (*write) (struct file *, const char *, size_t, loff_t *); + ssize_t (*read)(struct file *, char *, size_t, loff_t *); +} cfs_param_file_ops_t; +typedef cfs_param_file_ops_t *cfs_lproc_filep_t; + +static inline cfs_proc_inode_t *FAKE_PROC_I(const cfs_inode_t *inode) +{ + return container_of(inode, cfs_proc_inode_t, param_inode); +} + +#define CFS_PARAM_MODULE NULL +#define cfs_file_private(file) (file->param_private) +#define cfs_dentry_data(dentry) (dentry->param_data) +#define cfs_proc_inode(proc_inode) (proc_inode->param_inode) +#define cfs_proc_inode_pde(proc_inode) (proc_inode->param_pde) +#define cfs_seq_read_common NULL +#define cfs_seq_lseek_common NULL +#define cfs_seq_private(seq) (seq->private) +#define cfs_seq_read(file, buf, count, ppos, rc) do {} while(0) +#define cfs_seq_open(file, ops, rc) \ +do { \ + cfs_seq_file_t *p = cfs_file_private(file); \ + if (!p) { \ + LIBCFS_ALLOC(p, sizeof(*p)); \ + if (!p) { \ + rc = -ENOMEM; \ + break; \ + } \ + cfs_file_private(file) = p; \ + } \ + memset(p, 0, sizeof(*p)); \ + p->op = ops; \ + rc = 0; \ +} while(0) + +#endif /* LPROCFS */ + +/* XXX: params_tree APIs */ + +#endif /* __PARAMS_TREE_H__ */ diff --git a/drivers/staging/lustre/include/linux/lnet/api-support.h b/drivers/staging/lustre/include/linux/lnet/api-support.h new file mode 100644 index 000000000000..a8d91dbe6060 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/api-support.h @@ -0,0 +1,44 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_API_SUPPORT_H__ +#define __LNET_API_SUPPORT_H__ + +#include <linux/lnet/linux/api-support.h> + +#include <linux/libcfs/libcfs.h> +#include <linux/lnet/types.h> +#include <linux/lnet/lnet.h> + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/api.h b/drivers/staging/lustre/include/linux/lnet/api.h new file mode 100644 index 000000000000..e8642e33860d --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/api.h @@ -0,0 +1,220 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_API_H__ +#define __LNET_API_H__ + +/** \defgroup lnet LNet + * + * The Lustre Networking subsystem. + * + * LNet is an asynchronous message-passing API, which provides an unreliable + * connectionless service that can't guarantee any order. It supports OFA IB, + * TCP/IP, and Cray Portals, and routes between heterogeneous networks. + * + * LNet can run both in OS kernel space and in userspace as a library. + * @{ + */ + +#include <linux/lnet/types.h> + +/** \defgroup lnet_init_fini Initialization and cleanup + * The LNet must be properly initialized before any LNet calls can be made. + * @{ */ +int LNetInit(void); +void LNetFini(void); + +int LNetNIInit(lnet_pid_t requested_pid); +int LNetNIFini(void); +/** @} lnet_init_fini */ + +/** \defgroup lnet_addr LNet addressing and basic types + * + * Addressing scheme and basic data types of LNet. + * + * The LNet API is memory-oriented, so LNet must be able to address not only + * end-points but also memory region within a process address space. + * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process + * in a node. A portal represents an opening in the address space of a + * process. Match bits is criteria to identify a region of memory inside a + * portal, and offset specifies an offset within the memory region. + * + * LNet creates a table of portals for each process during initialization. + * This table has MAX_PORTALS entries and its size can't be dynamically + * changed. A portal stays empty until the owning process starts to add + * memory regions to it. A portal is sometimes called an index because + * it's an entry in the portals table of a process. + * + * \see LNetMEAttach + * @{ */ +int LNetGetId(unsigned int index, lnet_process_id_t *id); +int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order); +void LNetSnprintHandle(char *str, int str_len, lnet_handle_any_t handle); + +/** @} lnet_addr */ + + +/** \defgroup lnet_me Match entries + * + * A match entry (abbreviated as ME) describes a set of criteria to accept + * incoming requests. + * + * A portal is essentially a match list plus a set of attributes. A match + * list is a chain of MEs. Each ME includes a pointer to a memory descriptor + * and a set of match criteria. The match criteria can be used to reject + * incoming requests based on process ID or the match bits provided in the + * request. MEs can be dynamically inserted into a match list by LNetMEAttach() + * and LNetMEInsert(), and removed from its list by LNetMEUnlink(). + * @{ */ +int LNetMEAttach(unsigned int portal, + lnet_process_id_t match_id_in, + __u64 match_bits_in, + __u64 ignore_bits_in, + lnet_unlink_t unlink_in, + lnet_ins_pos_t pos_in, + lnet_handle_me_t *handle_out); + +int LNetMEInsert(lnet_handle_me_t current_in, + lnet_process_id_t match_id_in, + __u64 match_bits_in, + __u64 ignore_bits_in, + lnet_unlink_t unlink_in, + lnet_ins_pos_t position_in, + lnet_handle_me_t *handle_out); + +int LNetMEUnlink(lnet_handle_me_t current_in); +/** @} lnet_me */ + +/** \defgroup lnet_md Memory descriptors + * + * A memory descriptor contains information about a region of a user's + * memory (either in kernel or user space) and optionally points to an + * event queue where information about the operations performed on the + * memory descriptor are recorded. Memory descriptor is abbreviated as + * MD and can be used interchangeably with the memory region it describes. + * + * The LNet API provides two operations to create MDs: LNetMDAttach() + * and LNetMDBind(); one operation to unlink and release the resources + * associated with a MD: LNetMDUnlink(). + * @{ */ +int LNetMDAttach(lnet_handle_me_t current_in, + lnet_md_t md_in, + lnet_unlink_t unlink_in, + lnet_handle_md_t *handle_out); + +int LNetMDBind(lnet_md_t md_in, + lnet_unlink_t unlink_in, + lnet_handle_md_t *handle_out); + +int LNetMDUnlink(lnet_handle_md_t md_in); +/** @} lnet_md */ + +/** \defgroup lnet_eq Events and event queues + * + * Event queues (abbreviated as EQ) are used to log operations performed on + * local MDs. In particular, they signal the completion of a data transmission + * into or out of a MD. They can also be used to hold acknowledgments for + * completed PUT operations and indicate when a MD has been unlinked. Multiple + * MDs can share a single EQ. An EQ may have an optional event handler + * associated with it. If an event handler exists, it will be run for each + * event that is deposited into the EQ. + * + * In addition to the lnet_handle_eq_t, the LNet API defines two types + * associated with events: The ::lnet_event_kind_t defines the kinds of events + * that can be stored in an EQ. The lnet_event_t defines a structure that + * holds the information about with an event. + * + * There are five functions for dealing with EQs: LNetEQAlloc() is used to + * create an EQ and allocate the resources needed, while LNetEQFree() + * releases these resources and free the EQ. LNetEQGet() retrieves the next + * event from an EQ, and LNetEQWait() can be used to block a process until + * an EQ has at least one event. LNetEQPoll() can be used to test or wait + * on multiple EQs. + * @{ */ +int LNetEQAlloc(unsigned int count_in, + lnet_eq_handler_t handler, + lnet_handle_eq_t *handle_out); + +int LNetEQFree(lnet_handle_eq_t eventq_in); + +int LNetEQGet(lnet_handle_eq_t eventq_in, + lnet_event_t *event_out); + + +int LNetEQWait(lnet_handle_eq_t eventq_in, + lnet_event_t *event_out); + +int LNetEQPoll(lnet_handle_eq_t *eventqs_in, + int neq_in, + int timeout_ms, + lnet_event_t *event_out, + int *which_eq_out); +/** @} lnet_eq */ + +/** \defgroup lnet_data Data movement operations + * + * The LNet API provides two data movement operations: LNetPut() + * and LNetGet(). + * @{ */ +int LNetPut(lnet_nid_t self, + lnet_handle_md_t md_in, + lnet_ack_req_t ack_req_in, + lnet_process_id_t target_in, + unsigned int portal_in, + __u64 match_bits_in, + unsigned int offset_in, + __u64 hdr_data_in); + +int LNetGet(lnet_nid_t self, + lnet_handle_md_t md_in, + lnet_process_id_t target_in, + unsigned int portal_in, + __u64 match_bits_in, + unsigned int offset_in); +/** @} lnet_data */ + + +/** \defgroup lnet_misc Miscellaneous operations. + * Miscellaneous operations. + * @{ */ + +int LNetSetLazyPortal(int portal); +int LNetClearLazyPortal(int portal); +int LNetCtl(unsigned int cmd, void *arg); +int LNetSetAsync(lnet_process_id_t id, int nasync); + +/** @} lnet_misc */ + +/** @} lnet */ +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h new file mode 100644 index 000000000000..59bff0bea816 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h @@ -0,0 +1,874 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/lib-lnet.h + * + * Top level include for library side routines + */ + +#ifndef __LNET_LIB_LNET_H__ +#define __LNET_LIB_LNET_H__ + +#include <linux/lnet/linux/lib-lnet.h> + +#include <linux/libcfs/libcfs.h> +#include <linux/lnet/types.h> +#include <linux/lnet/lnet.h> +#include <linux/lnet/lib-types.h> + +extern lnet_t the_lnet; /* THE network */ + +#if defined(LNET_USE_LIB_FREELIST) +/* 1 CPT, simplify implementation... */ +# define LNET_CPT_MAX_BITS 0 + +#else /* KERNEL and no freelist */ + +# if (BITS_PER_LONG == 32) +/* 2 CPTs, allowing more CPTs might make us under memory pressure */ +# define LNET_CPT_MAX_BITS 1 + +# else /* 64-bit system */ +/* + * 256 CPTs for thousands of CPUs, allowing more CPTs might make us + * under risk of consuming all lh_cookie. + */ +# define LNET_CPT_MAX_BITS 8 +# endif /* BITS_PER_LONG == 32 */ +#endif + +/* max allowed CPT number */ +#define LNET_CPT_MAX (1 << LNET_CPT_MAX_BITS) + +#define LNET_CPT_NUMBER (the_lnet.ln_cpt_number) +#define LNET_CPT_BITS (the_lnet.ln_cpt_bits) +#define LNET_CPT_MASK ((1ULL << LNET_CPT_BITS) - 1) + +/** exclusive lock */ +#define LNET_LOCK_EX CFS_PERCPT_LOCK_EX + +static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh) +{ + return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE && + wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE); +} + +static inline int lnet_md_exhausted (lnet_libmd_t *md) +{ + return (md->md_threshold == 0 || + ((md->md_options & LNET_MD_MAX_SIZE) != 0 && + md->md_offset + md->md_max_size > md->md_length)); +} + +static inline int lnet_md_unlinkable (lnet_libmd_t *md) +{ + /* Should unlink md when its refcount is 0 and either: + * - md has been flagged for deletion (by auto unlink or LNetM[DE]Unlink, + * in the latter case md may not be exhausted). + * - auto unlink is on and md is exhausted. + */ + if (md->md_refcount != 0) + return 0; + + if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0) + return 1; + + return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 && + lnet_md_exhausted(md)); +} + +#define lnet_cpt_table() (the_lnet.ln_cpt_table) +#define lnet_cpt_current() cfs_cpt_current(the_lnet.ln_cpt_table, 1) + +static inline int +lnet_cpt_of_cookie(__u64 cookie) +{ + unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK; + + /* LNET_CPT_NUMBER doesn't have to be power2, which means we can + * get illegal cpt from it's invalid cookie */ + return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER; +} + +static inline void +lnet_res_lock(int cpt) +{ + cfs_percpt_lock(the_lnet.ln_res_lock, cpt); +} + +static inline void +lnet_res_unlock(int cpt) +{ + cfs_percpt_unlock(the_lnet.ln_res_lock, cpt); +} + +static inline int +lnet_res_lock_current(void) +{ + int cpt = lnet_cpt_current(); + + lnet_res_lock(cpt); + return cpt; +} + +static inline void +lnet_net_lock(int cpt) +{ + cfs_percpt_lock(the_lnet.ln_net_lock, cpt); +} + +static inline void +lnet_net_unlock(int cpt) +{ + cfs_percpt_unlock(the_lnet.ln_net_lock, cpt); +} + +static inline int +lnet_net_lock_current(void) +{ + int cpt = lnet_cpt_current(); + + lnet_net_lock(cpt); + return cpt; +} + +#define LNET_LOCK() lnet_net_lock(LNET_LOCK_EX) +#define LNET_UNLOCK() lnet_net_unlock(LNET_LOCK_EX) + + +#define lnet_ptl_lock(ptl) spin_lock(&(ptl)->ptl_lock) +#define lnet_ptl_unlock(ptl) spin_unlock(&(ptl)->ptl_lock) +#define lnet_eq_wait_lock() spin_lock(&the_lnet.ln_eq_wait_lock) +#define lnet_eq_wait_unlock() spin_unlock(&the_lnet.ln_eq_wait_lock) +#define lnet_ni_lock(ni) spin_lock(&(ni)->ni_lock) +#define lnet_ni_unlock(ni) spin_unlock(&(ni)->ni_lock) +#define LNET_MUTEX_LOCK(m) mutex_lock(m) +#define LNET_MUTEX_UNLOCK(m) mutex_unlock(m) + + +#define MAX_PORTALS 64 + +/* these are only used by code with LNET_USE_LIB_FREELIST, but we still + * exported them to !LNET_USE_LIB_FREELIST for easy implemetation */ +#define LNET_FL_MAX_MES 2048 +#define LNET_FL_MAX_MDS 2048 +#define LNET_FL_MAX_EQS 512 +#define LNET_FL_MAX_MSGS 2048 /* Outstanding messages */ + +#ifdef LNET_USE_LIB_FREELIST + +int lnet_freelist_init(lnet_freelist_t *fl, int n, int size); +void lnet_freelist_fini(lnet_freelist_t *fl); + +static inline void * +lnet_freelist_alloc (lnet_freelist_t *fl) +{ + /* ALWAYS called with liblock held */ + lnet_freeobj_t *o; + + if (list_empty (&fl->fl_list)) + return (NULL); + + o = list_entry (fl->fl_list.next, lnet_freeobj_t, fo_list); + list_del (&o->fo_list); + return ((void *)&o->fo_contents); +} + +static inline void +lnet_freelist_free (lnet_freelist_t *fl, void *obj) +{ + /* ALWAYS called with liblock held */ + lnet_freeobj_t *o = list_entry (obj, lnet_freeobj_t, fo_contents); + + list_add (&o->fo_list, &fl->fl_list); +} + + +static inline lnet_eq_t * +lnet_eq_alloc (void) +{ + /* NEVER called with resource lock held */ + struct lnet_res_container *rec = &the_lnet.ln_eq_container; + lnet_eq_t *eq; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_res_lock(0); + eq = (lnet_eq_t *)lnet_freelist_alloc(&rec->rec_freelist); + lnet_res_unlock(0); + + return eq; +} + +static inline void +lnet_eq_free_locked(lnet_eq_t *eq) +{ + /* ALWAYS called with resource lock held */ + struct lnet_res_container *rec = &the_lnet.ln_eq_container; + + LASSERT(LNET_CPT_NUMBER == 1); + lnet_freelist_free(&rec->rec_freelist, eq); +} + +static inline void +lnet_eq_free(lnet_eq_t *eq) +{ + lnet_res_lock(0); + lnet_eq_free_locked(eq); + lnet_res_unlock(0); +} + +static inline lnet_libmd_t * +lnet_md_alloc (lnet_md_t *umd) +{ + /* NEVER called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_md_containers[0]; + lnet_libmd_t *md; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_res_lock(0); + md = (lnet_libmd_t *)lnet_freelist_alloc(&rec->rec_freelist); + lnet_res_unlock(0); + + if (md != NULL) + INIT_LIST_HEAD(&md->md_list); + + return md; +} + +static inline void +lnet_md_free_locked(lnet_libmd_t *md) +{ + /* ALWAYS called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_md_containers[0]; + + LASSERT(LNET_CPT_NUMBER == 1); + lnet_freelist_free(&rec->rec_freelist, md); +} + +static inline void +lnet_md_free(lnet_libmd_t *md) +{ + lnet_res_lock(0); + lnet_md_free_locked(md); + lnet_res_unlock(0); +} + +static inline lnet_me_t * +lnet_me_alloc(void) +{ + /* NEVER called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_me_containers[0]; + lnet_me_t *me; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_res_lock(0); + me = (lnet_me_t *)lnet_freelist_alloc(&rec->rec_freelist); + lnet_res_unlock(0); + + return me; +} + +static inline void +lnet_me_free_locked(lnet_me_t *me) +{ + /* ALWAYS called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_me_containers[0]; + + LASSERT(LNET_CPT_NUMBER == 1); + lnet_freelist_free(&rec->rec_freelist, me); +} + +static inline void +lnet_me_free(lnet_me_t *me) +{ + lnet_res_lock(0); + lnet_me_free_locked(me); + lnet_res_unlock(0); +} + +static inline lnet_msg_t * +lnet_msg_alloc (void) +{ + /* NEVER called with network lock held */ + struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0]; + lnet_msg_t *msg; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_net_lock(0); + msg = (lnet_msg_t *)lnet_freelist_alloc(&msc->msc_freelist); + lnet_net_unlock(0); + + if (msg != NULL) { + /* NULL pointers, clear flags etc */ + memset(msg, 0, sizeof(*msg)); + } + return msg; +} + +static inline void +lnet_msg_free_locked(lnet_msg_t *msg) +{ + /* ALWAYS called with network lock held */ + struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0]; + + LASSERT(LNET_CPT_NUMBER == 1); + LASSERT(!msg->msg_onactivelist); + lnet_freelist_free(&msc->msc_freelist, msg); +} + +static inline void +lnet_msg_free (lnet_msg_t *msg) +{ + lnet_net_lock(0); + lnet_msg_free_locked(msg); + lnet_net_unlock(0); +} + +#else /* !LNET_USE_LIB_FREELIST */ + +static inline lnet_eq_t * +lnet_eq_alloc (void) +{ + /* NEVER called with liblock held */ + lnet_eq_t *eq; + + LIBCFS_ALLOC(eq, sizeof(*eq)); + return (eq); +} + +static inline void +lnet_eq_free(lnet_eq_t *eq) +{ + /* ALWAYS called with resource lock held */ + LIBCFS_FREE(eq, sizeof(*eq)); +} + +static inline lnet_libmd_t * +lnet_md_alloc (lnet_md_t *umd) +{ + /* NEVER called with liblock held */ + lnet_libmd_t *md; + unsigned int size; + unsigned int niov; + + if ((umd->options & LNET_MD_KIOV) != 0) { + niov = umd->length; + size = offsetof(lnet_libmd_t, md_iov.kiov[niov]); + } else { + niov = ((umd->options & LNET_MD_IOVEC) != 0) ? + umd->length : 1; + size = offsetof(lnet_libmd_t, md_iov.iov[niov]); + } + + LIBCFS_ALLOC(md, size); + + if (md != NULL) { + /* Set here in case of early free */ + md->md_options = umd->options; + md->md_niov = niov; + INIT_LIST_HEAD(&md->md_list); + } + + return (md); +} + +static inline void +lnet_md_free(lnet_libmd_t *md) +{ + /* ALWAYS called with resource lock held */ + unsigned int size; + + if ((md->md_options & LNET_MD_KIOV) != 0) + size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]); + else + size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]); + + LIBCFS_FREE(md, size); +} + +static inline lnet_me_t * +lnet_me_alloc (void) +{ + /* NEVER called with liblock held */ + lnet_me_t *me; + + LIBCFS_ALLOC(me, sizeof(*me)); + return (me); +} + +static inline void +lnet_me_free(lnet_me_t *me) +{ + /* ALWAYS called with resource lock held */ + LIBCFS_FREE(me, sizeof(*me)); +} + +static inline lnet_msg_t * +lnet_msg_alloc(void) +{ + /* NEVER called with liblock held */ + lnet_msg_t *msg; + + LIBCFS_ALLOC(msg, sizeof(*msg)); + + /* no need to zero, LIBCFS_ALLOC does for us */ + return (msg); +} + +static inline void +lnet_msg_free(lnet_msg_t *msg) +{ + /* ALWAYS called with network lock held */ + LASSERT(!msg->msg_onactivelist); + LIBCFS_FREE(msg, sizeof(*msg)); +} + +#define lnet_eq_free_locked(eq) lnet_eq_free(eq) +#define lnet_md_free_locked(md) lnet_md_free(md) +#define lnet_me_free_locked(me) lnet_me_free(me) +#define lnet_msg_free_locked(msg) lnet_msg_free(msg) + +#endif /* LNET_USE_LIB_FREELIST */ + +lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec, + __u64 cookie); +void lnet_res_lh_initialize(struct lnet_res_container *rec, + lnet_libhandle_t *lh); +static inline void +lnet_res_lh_invalidate(lnet_libhandle_t *lh) +{ + /* ALWAYS called with resource lock held */ + /* NB: cookie is still useful, don't reset it */ + list_del(&lh->lh_hash_chain); +} + +static inline void +lnet_eq2handle (lnet_handle_eq_t *handle, lnet_eq_t *eq) +{ + if (eq == NULL) { + LNetInvalidateHandle(handle); + return; + } + + handle->cookie = eq->eq_lh.lh_cookie; +} + +static inline lnet_eq_t * +lnet_handle2eq(lnet_handle_eq_t *handle) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + + lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_eq_t, eq_lh); +} + +static inline void +lnet_md2handle (lnet_handle_md_t *handle, lnet_libmd_t *md) +{ + handle->cookie = md->md_lh.lh_cookie; +} + +static inline lnet_libmd_t * +lnet_handle2md(lnet_handle_md_t *handle) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + int cpt; + + cpt = lnet_cpt_of_cookie(handle->cookie); + lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt], + handle->cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_libmd_t, md_lh); +} + +static inline lnet_libmd_t * +lnet_wire_handle2md(lnet_handle_wire_t *wh) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + int cpt; + + if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie) + return NULL; + + cpt = lnet_cpt_of_cookie(wh->wh_object_cookie); + lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt], + wh->wh_object_cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_libmd_t, md_lh); +} + +static inline void +lnet_me2handle (lnet_handle_me_t *handle, lnet_me_t *me) +{ + handle->cookie = me->me_lh.lh_cookie; +} + +static inline lnet_me_t * +lnet_handle2me(lnet_handle_me_t *handle) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + int cpt; + + cpt = lnet_cpt_of_cookie(handle->cookie); + lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt], + handle->cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_me_t, me_lh); +} + +static inline void +lnet_peer_addref_locked(lnet_peer_t *lp) +{ + LASSERT (lp->lp_refcount > 0); + lp->lp_refcount++; +} + +extern void lnet_destroy_peer_locked(lnet_peer_t *lp); + +static inline void +lnet_peer_decref_locked(lnet_peer_t *lp) +{ + LASSERT (lp->lp_refcount > 0); + lp->lp_refcount--; + if (lp->lp_refcount == 0) + lnet_destroy_peer_locked(lp); +} + +static inline int +lnet_isrouter(lnet_peer_t *lp) +{ + return lp->lp_rtr_refcount != 0; +} + +static inline void +lnet_ni_addref_locked(lnet_ni_t *ni, int cpt) +{ + LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER); + LASSERT(*ni->ni_refs[cpt] >= 0); + + (*ni->ni_refs[cpt])++; +} + +static inline void +lnet_ni_addref(lnet_ni_t *ni) +{ + lnet_net_lock(0); + lnet_ni_addref_locked(ni, 0); + lnet_net_unlock(0); +} + +static inline void +lnet_ni_decref_locked(lnet_ni_t *ni, int cpt) +{ + LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER); + LASSERT(*ni->ni_refs[cpt] > 0); + + (*ni->ni_refs[cpt])--; +} + +static inline void +lnet_ni_decref(lnet_ni_t *ni) +{ + lnet_net_lock(0); + lnet_ni_decref_locked(ni, 0); + lnet_net_unlock(0); +} + +void lnet_ni_free(lnet_ni_t *ni); + +static inline int +lnet_nid2peerhash(lnet_nid_t nid) +{ + return cfs_hash_long(nid, LNET_PEER_HASH_BITS); +} + +static inline struct list_head * +lnet_net2rnethash(__u32 net) +{ + return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) + + LNET_NETTYP(net)) & + ((1U << the_lnet.ln_remote_nets_hbits) - 1)]; +} + +extern lnd_t the_lolnd; + + +extern int lnet_cpt_of_nid_locked(lnet_nid_t nid); +extern int lnet_cpt_of_nid(lnet_nid_t nid); +extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt); +extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt); +extern lnet_ni_t *lnet_net2ni(__u32 net); + +int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, cfs_time_t when); +void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when); +int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid); +int lnet_check_routes(void); +int lnet_del_route(__u32 net, lnet_nid_t gw_nid); +void lnet_destroy_routes(void); +int lnet_get_route(int idx, __u32 *net, __u32 *hops, + lnet_nid_t *gateway, __u32 *alive); +void lnet_proc_init(void); +void lnet_proc_fini(void); +int lnet_rtrpools_alloc(int im_a_router); +void lnet_rtrpools_free(void); +lnet_remotenet_t *lnet_find_net_locked (__u32 net); + +int lnet_islocalnid(lnet_nid_t nid); +int lnet_islocalnet(__u32 net); + +void lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md, + unsigned int offset, unsigned int mlen); +void lnet_msg_detach_md(lnet_msg_t *msg, int status); +void lnet_build_unlink_event(lnet_libmd_t *md, lnet_event_t *ev); +void lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type); +void lnet_msg_commit(lnet_msg_t *msg, int cpt); +void lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status); + +void lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev); +void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target, + unsigned int offset, unsigned int len); +int lnet_send(lnet_nid_t nid, lnet_msg_t *msg, lnet_nid_t rtr_nid); +void lnet_return_tx_credits_locked(lnet_msg_t *msg); +void lnet_return_rx_credits_locked(lnet_msg_t *msg); + +/* portals functions */ +/* portals attributes */ +static inline int +lnet_ptl_is_lazy(lnet_portal_t *ptl) +{ + return !!(ptl->ptl_options & LNET_PTL_LAZY); +} + +static inline int +lnet_ptl_is_unique(lnet_portal_t *ptl) +{ + return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE); +} + +static inline int +lnet_ptl_is_wildcard(lnet_portal_t *ptl) +{ + return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD); +} + +static inline void +lnet_ptl_setopt(lnet_portal_t *ptl, int opt) +{ + ptl->ptl_options |= opt; +} + +static inline void +lnet_ptl_unsetopt(lnet_portal_t *ptl, int opt) +{ + ptl->ptl_options &= ~opt; +} + +/* match-table functions */ +struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable, + lnet_process_id_t id, __u64 mbits); +struct lnet_match_table *lnet_mt_of_attach(unsigned int index, + lnet_process_id_t id, __u64 mbits, + __u64 ignore_bits, + lnet_ins_pos_t pos); +int lnet_mt_match_md(struct lnet_match_table *mtable, + struct lnet_match_info *info, struct lnet_msg *msg); + +/* portals match/attach functions */ +void lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md, + struct list_head *matches, struct list_head *drops); +void lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md); +int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg); + +/* initialized and finalize portals */ +int lnet_portals_create(void); +void lnet_portals_destroy(void); + +/* message functions */ +int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr, + lnet_nid_t fromnid, void *private, int rdma_req); +void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, + unsigned int offset, unsigned int mlen, unsigned int rlen); +lnet_msg_t *lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *get_msg); +void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len); +void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc); +void lnet_drop_delayed_msg_list(struct list_head *head, char *reason); +void lnet_recv_delayed_msg_list(struct list_head *head); + +int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt); +void lnet_msg_container_cleanup(struct lnet_msg_container *container); +void lnet_msg_containers_destroy(void); +int lnet_msg_containers_create(void); + +char *lnet_msgtyp2str (int type); +void lnet_print_hdr (lnet_hdr_t * hdr); +int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold); + +void lnet_counters_get(lnet_counters_t *counters); +void lnet_counters_reset(void); + +unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov); +int lnet_extract_iov (int dst_niov, struct iovec *dst, + int src_niov, struct iovec *src, + unsigned int offset, unsigned int len); + +unsigned int lnet_kiov_nob (unsigned int niov, lnet_kiov_t *iov); +int lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst, + int src_niov, lnet_kiov_t *src, + unsigned int offset, unsigned int len); + +void lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, + unsigned int doffset, + unsigned int nsiov, struct iovec *siov, + unsigned int soffset, unsigned int nob); +void lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, + unsigned int iovoffset, + unsigned int nkiov, lnet_kiov_t *kiov, + unsigned int kiovoffset, unsigned int nob); +void lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, + unsigned int kiovoffset, + unsigned int niov, struct iovec *iov, + unsigned int iovoffset, unsigned int nob); +void lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov, + unsigned int doffset, + unsigned int nskiov, lnet_kiov_t *skiov, + unsigned int soffset, unsigned int nob); + +static inline void +lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset, + unsigned int nsiov, struct iovec *siov, unsigned int soffset, + unsigned int nob) +{ + struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen}; + + lnet_copy_iov2iov(1, &diov, doffset, + nsiov, siov, soffset, nob); +} + +static inline void +lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset, + unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset, + unsigned int nob) +{ + struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen}; + + lnet_copy_kiov2iov(1, &diov, doffset, + nsiov, skiov, soffset, nob); +} + +static inline void +lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset, + int slen, void *src, unsigned int soffset, unsigned int nob) +{ + struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen}; + lnet_copy_iov2iov(ndiov, diov, doffset, + 1, &siov, soffset, nob); +} + +static inline void +lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset, + int slen, void *src, unsigned int soffset, unsigned int nob) +{ + struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen}; + lnet_copy_iov2kiov(ndiov, dkiov, doffset, + 1, &siov, soffset, nob); +} + +void lnet_me_unlink(lnet_me_t *me); + +void lnet_md_unlink(lnet_libmd_t *md); +void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd); + +void lnet_register_lnd(lnd_t *lnd); +void lnet_unregister_lnd(lnd_t *lnd); +int lnet_set_ip_niaddr (lnet_ni_t *ni); + +int lnet_connect(socket_t **sockp, lnet_nid_t peer_nid, + __u32 local_ip, __u32 peer_ip, int peer_port); +void lnet_connect_console_error(int rc, lnet_nid_t peer_nid, + __u32 peer_ip, int port); +int lnet_count_acceptor_nis(void); +int lnet_acceptor_timeout(void); +int lnet_acceptor_port(void); + +int lnet_count_acceptor_nis(void); +int lnet_acceptor_port(void); + +int lnet_acceptor_start(void); +void lnet_acceptor_stop(void); + +void lnet_get_tunables(void); +int lnet_peers_start_down(void); +int lnet_peer_buffer_credits(lnet_ni_t *ni); + +int lnet_router_checker_start(void); +void lnet_router_checker_stop(void); +void lnet_swap_pinginfo(lnet_ping_info_t *info); + +int lnet_ping_target_init(void); +void lnet_ping_target_fini(void); +int lnet_ping(lnet_process_id_t id, int timeout_ms, + lnet_process_id_t *ids, int n_ids); + +int lnet_parse_ip2nets (char **networksp, char *ip2nets); +int lnet_parse_routes (char *route_str, int *im_a_router); +int lnet_parse_networks (struct list_head *nilist, char *networks); + +int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt); +lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable, + lnet_nid_t nid); +void lnet_peer_tables_cleanup(void); +void lnet_peer_tables_destroy(void); +int lnet_peer_tables_create(void); +void lnet_debug_peer(lnet_nid_t nid); + + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h new file mode 100644 index 000000000000..86428d4b993e --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h @@ -0,0 +1,765 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/lib-types.h + * + * Types used by the library side routines that do not need to be + * exposed to the user application + */ + +#ifndef __LNET_LIB_TYPES_H__ +#define __LNET_LIB_TYPES_H__ + +#include <linux/lnet/linux/lib-types.h> + +#include <linux/libcfs/libcfs.h> +#include <linux/list.h> +#include <linux/lnet/types.h> + +#define WIRE_ATTR __attribute__((packed)) + +/* Packed version of lnet_process_id_t to transfer via network */ +typedef struct { + lnet_nid_t nid; + lnet_pid_t pid; /* node id / process id */ +} WIRE_ATTR lnet_process_id_packed_t; + +/* The wire handle's interface cookie only matches one network interface in + * one epoch (i.e. new cookie when the interface restarts or the node + * reboots). The object cookie only matches one object on that interface + * during that object's lifetime (i.e. no cookie re-use). */ +typedef struct { + __u64 wh_interface_cookie; + __u64 wh_object_cookie; +} WIRE_ATTR lnet_handle_wire_t; + +typedef enum { + LNET_MSG_ACK = 0, + LNET_MSG_PUT, + LNET_MSG_GET, + LNET_MSG_REPLY, + LNET_MSG_HELLO, +} lnet_msg_type_t; + +/* The variant fields of the portals message header are aligned on an 8 + * byte boundary in the message header. Note that all types used in these + * wire structs MUST be fixed size and the smaller types are placed at the + * end. */ +typedef struct lnet_ack { + lnet_handle_wire_t dst_wmd; + __u64 match_bits; + __u32 mlength; +} WIRE_ATTR lnet_ack_t; + +typedef struct lnet_put { + lnet_handle_wire_t ack_wmd; + __u64 match_bits; + __u64 hdr_data; + __u32 ptl_index; + __u32 offset; +} WIRE_ATTR lnet_put_t; + +typedef struct lnet_get { + lnet_handle_wire_t return_wmd; + __u64 match_bits; + __u32 ptl_index; + __u32 src_offset; + __u32 sink_length; +} WIRE_ATTR lnet_get_t; + +typedef struct lnet_reply { + lnet_handle_wire_t dst_wmd; +} WIRE_ATTR lnet_reply_t; + +typedef struct lnet_hello { + __u64 incarnation; + __u32 type; +} WIRE_ATTR lnet_hello_t; + +typedef struct { + lnet_nid_t dest_nid; + lnet_nid_t src_nid; + lnet_pid_t dest_pid; + lnet_pid_t src_pid; + __u32 type; /* lnet_msg_type_t */ + __u32 payload_length; /* payload data to follow */ + /*<------__u64 aligned------->*/ + union { + lnet_ack_t ack; + lnet_put_t put; + lnet_get_t get; + lnet_reply_t reply; + lnet_hello_t hello; + } msg; +} WIRE_ATTR lnet_hdr_t; + +/* A HELLO message contains a magic number and protocol version + * code in the header's dest_nid, the peer's NID in the src_nid, and + * LNET_MSG_HELLO in the type field. All other common fields are zero + * (including payload_size; i.e. no payload). + * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is + * running the same protocol and to find out its NID. These LNDs should + * exchange HELLO messages when a connection is first established. Individual + * LNDs can put whatever else they fancy in lnet_hdr_t::msg. + */ +typedef struct { + __u32 magic; /* LNET_PROTO_TCP_MAGIC */ + __u16 version_major; /* increment on incompatible change */ + __u16 version_minor; /* increment on compatible change */ +} WIRE_ATTR lnet_magicversion_t; + +/* PROTO MAGIC for LNDs */ +#define LNET_PROTO_IB_MAGIC 0x0be91b91 +#define LNET_PROTO_RA_MAGIC 0x0be91b92 +#define LNET_PROTO_QSW_MAGIC 0x0be91b93 +#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */ +#define LNET_PROTO_TCP_MAGIC 0xeebc0ded +#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */ +#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */ +#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 +#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ + +/* Placeholder for a future "unified" protocol across all LNDs */ +/* Current LNDs that receive a request with this magic will respond with a + * "stub" reply using their current protocol */ +#define LNET_PROTO_MAGIC 0x45726963 /* ! */ + + +#define LNET_PROTO_TCP_VERSION_MAJOR 1 +#define LNET_PROTO_TCP_VERSION_MINOR 0 + +/* Acceptor connection request */ +typedef struct { + __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ + __u32 acr_version; /* protocol version */ + __u64 acr_nid; /* target NID */ +} WIRE_ATTR lnet_acceptor_connreq_t; + +#define LNET_PROTO_ACCEPTOR_VERSION 1 + +/* forward refs */ +struct lnet_libmd; + +typedef struct lnet_msg { + struct list_head msg_activelist; + struct list_head msg_list; /* Q for credits/MD */ + + lnet_process_id_t msg_target; + /* where is it from, it's only for building event */ + lnet_nid_t msg_from; + __u32 msg_type; + + /* commited for sending */ + unsigned int msg_tx_committed:1; + /* CPT # this message committed for sending */ + unsigned int msg_tx_cpt:15; + /* commited for receiving */ + unsigned int msg_rx_committed:1; + /* CPT # this message committed for receiving */ + unsigned int msg_rx_cpt:15; + /* queued for tx credit */ + unsigned int msg_tx_delayed:1; + /* queued for RX buffer */ + unsigned int msg_rx_delayed:1; + /* ready for pending on RX delay list */ + unsigned int msg_rx_ready_delay:1; + + unsigned int msg_vmflush:1; /* VM trying to free memory */ + unsigned int msg_target_is_router:1; /* sending to a router */ + unsigned int msg_routing:1; /* being forwarded */ + unsigned int msg_ack:1; /* ack on finalize (PUT) */ + unsigned int msg_sending:1; /* outgoing message */ + unsigned int msg_receiving:1; /* being received */ + unsigned int msg_txcredit:1; /* taken an NI send credit */ + unsigned int msg_peertxcredit:1; /* taken a peer send credit */ + unsigned int msg_rtrcredit:1; /* taken a globel router credit */ + unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ + unsigned int msg_onactivelist:1; /* on the activelist */ + + struct lnet_peer *msg_txpeer; /* peer I'm sending to */ + struct lnet_peer *msg_rxpeer; /* peer I received from */ + + void *msg_private; + struct lnet_libmd *msg_md; + + unsigned int msg_len; + unsigned int msg_wanted; + unsigned int msg_offset; + unsigned int msg_niov; + struct iovec *msg_iov; + lnet_kiov_t *msg_kiov; + + lnet_event_t msg_ev; + lnet_hdr_t msg_hdr; +} lnet_msg_t; + + +typedef struct lnet_libhandle { + struct list_head lh_hash_chain; + __u64 lh_cookie; +} lnet_libhandle_t; + +#define lh_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) + +typedef struct lnet_eq { + struct list_head eq_list; + lnet_libhandle_t eq_lh; + lnet_seq_t eq_enq_seq; + lnet_seq_t eq_deq_seq; + unsigned int eq_size; + lnet_eq_handler_t eq_callback; + lnet_event_t *eq_events; + int **eq_refs; /* percpt refcount for EQ */ +} lnet_eq_t; + +typedef struct lnet_me { + struct list_head me_list; + lnet_libhandle_t me_lh; + lnet_process_id_t me_match_id; + unsigned int me_portal; + unsigned int me_pos; /* hash offset in mt_hash */ + __u64 me_match_bits; + __u64 me_ignore_bits; + lnet_unlink_t me_unlink; + struct lnet_libmd *me_md; +} lnet_me_t; + +typedef struct lnet_libmd { + struct list_head md_list; + lnet_libhandle_t md_lh; + lnet_me_t *md_me; + char *md_start; + unsigned int md_offset; + unsigned int md_length; + unsigned int md_max_size; + int md_threshold; + int md_refcount; + unsigned int md_options; + unsigned int md_flags; + void *md_user_ptr; + lnet_eq_t *md_eq; + unsigned int md_niov; /* # frags */ + union { + struct iovec iov[LNET_MAX_IOV]; + lnet_kiov_t kiov[LNET_MAX_IOV]; + } md_iov; +} lnet_libmd_t; + +#define LNET_MD_FLAG_ZOMBIE (1 << 0) +#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) + +#ifdef LNET_USE_LIB_FREELIST +typedef struct +{ + void *fl_objs; /* single contiguous array of objects */ + int fl_nobjs; /* the number of them */ + int fl_objsize; /* the size (including overhead) of each of them */ + struct list_head fl_list; /* where they are enqueued */ +} lnet_freelist_t; + +typedef struct +{ + struct list_head fo_list; /* enqueue on fl_list */ + void *fo_contents; /* aligned contents */ +} lnet_freeobj_t; +#endif + +typedef struct { + /* info about peers we are trying to fail */ + struct list_head tp_list; /* ln_test_peers */ + lnet_nid_t tp_nid; /* matching nid */ + unsigned int tp_threshold; /* # failures to simulate */ +} lnet_test_peer_t; + +#define LNET_COOKIE_TYPE_MD 1 +#define LNET_COOKIE_TYPE_ME 2 +#define LNET_COOKIE_TYPE_EQ 3 +#define LNET_COOKIE_TYPE_BITS 2 +#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) + +struct lnet_ni; /* forward ref */ + +typedef struct lnet_lnd +{ + /* fields managed by portals */ + struct list_head lnd_list; /* stash in the LND table */ + int lnd_refcount; /* # active instances */ + + /* fields initialised by the LND */ + unsigned int lnd_type; + + int (*lnd_startup) (struct lnet_ni *ni); + void (*lnd_shutdown) (struct lnet_ni *ni); + int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); + + /* In data movement APIs below, payload buffers are described as a set + * of 'niov' fragments which are... + * EITHER + * in virtual memory (struct iovec *iov != NULL) + * OR + * in pages (kernel only: plt_kiov_t *kiov != NULL). + * The LND may NOT overwrite these fragment descriptors. + * An 'offset' and may specify a byte offset within the set of + * fragments to start from + */ + + /* Start sending a preformatted message. 'private' is NULL for PUT and + * GET messages; otherwise this is a response to an incoming message + * and 'private' is the 'private' passed to lnet_parse(). Return + * non-zero for immediate failure, otherwise complete later with + * lnet_finalize() */ + int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); + + /* Start receiving 'mlen' bytes of payload data, skipping the following + * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to + * lnet_parse(). Return non-zero for immedaite failure, otherwise + * complete later with lnet_finalize(). This also gives back a receive + * credit if the LND does flow control. */ + int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, + int delayed, unsigned int niov, + struct iovec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen); + + /* lnet_parse() has had to delay processing of this message + * (e.g. waiting for a forwarding buffer or send credits). Give the + * LND a chance to free urgently needed resources. If called, return 0 + * for success and do NOT give back a receive credit; that has to wait + * until lnd_recv() gets called. On failure return < 0 and + * release resources; lnd_recv() will not be called. */ + int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, + void **new_privatep); + + /* notification of peer health */ + void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); + + /* query of peer aliveness */ + void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, cfs_time_t *when); + + /* accept a new connection */ + int (*lnd_accept)(struct lnet_ni *ni, socket_t *sock); + +} lnd_t; + +#define LNET_NI_STATUS_UP 0x15aac0de +#define LNET_NI_STATUS_DOWN 0xdeadface +#define LNET_NI_STATUS_INVALID 0x00000000 +typedef struct { + lnet_nid_t ns_nid; + __u32 ns_status; + __u32 ns_unused; +} WIRE_ATTR lnet_ni_status_t; + +struct lnet_tx_queue { + int tq_credits; /* # tx credits free */ + int tq_credits_min; /* lowest it's been */ + int tq_credits_max; /* total # tx credits */ + struct list_head tq_delayed; /* delayed TXs */ +}; + +#define LNET_MAX_INTERFACES 16 + +typedef struct lnet_ni { + spinlock_t ni_lock; + struct list_head ni_list; /* chain on ln_nis */ + struct list_head ni_cptlist; /* chain on ln_nis_cpt */ + int ni_maxtxcredits; /* # tx credits */ + /* # per-peer send credits */ + int ni_peertxcredits; + /* # per-peer router buffer credits */ + int ni_peerrtrcredits; + /* seconds to consider peer dead */ + int ni_peertimeout; + int ni_ncpts; /* number of CPTs */ + __u32 *ni_cpts; /* bond NI on some CPTs */ + lnet_nid_t ni_nid; /* interface's NID */ + void *ni_data; /* instance-specific data */ + lnd_t *ni_lnd; /* procedural interface */ + struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ + int **ni_refs; /* percpt reference count */ + long ni_last_alive; /* when I was last alive */ + lnet_ni_status_t *ni_status; /* my health status */ + /* equivalent interfaces to use */ + char *ni_interfaces[LNET_MAX_INTERFACES]; +} lnet_ni_t; + +#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL + +/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x + * of old LNet, so there shouldn't be any compatibility issue */ +#define LNET_PING_FEAT_INVAL (0) /* no feature */ +#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */ +#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ + +#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ + LNET_PING_FEAT_NI_STATUS) + +typedef struct { + __u32 pi_magic; + __u32 pi_features; + lnet_pid_t pi_pid; + __u32 pi_nnis; + lnet_ni_status_t pi_ni[0]; +} WIRE_ATTR lnet_ping_info_t; + +/* router checker data, per router */ +#define LNET_MAX_RTR_NIS 16 +#define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS]) +typedef struct { + /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ + struct list_head rcd_list; + lnet_handle_md_t rcd_mdh; /* ping buffer MD */ + struct lnet_peer *rcd_gateway; /* reference to gateway */ + lnet_ping_info_t *rcd_pinginfo; /* ping buffer */ +} lnet_rc_data_t; + +typedef struct lnet_peer { + struct list_head lp_hashlist; /* chain on peer hash */ + struct list_head lp_txq; /* messages blocking for tx credits */ + struct list_head lp_rtrq; /* messages blocking for router credits */ + struct list_head lp_rtr_list; /* chain on router list */ + int lp_txcredits; /* # tx credits available */ + int lp_mintxcredits; /* low water mark */ + int lp_rtrcredits; /* # router credits */ + int lp_minrtrcredits; /* low water mark */ + unsigned int lp_alive:1; /* alive/dead? */ + unsigned int lp_notify:1; /* notification outstanding? */ + unsigned int lp_notifylnd:1; /* outstanding notification for LND? */ + unsigned int lp_notifying:1; /* some thread is handling notification */ + unsigned int lp_ping_notsent; /* SEND event outstanding from ping */ + int lp_alive_count; /* # times router went dead<->alive */ + long lp_txqnob; /* bytes queued for sending */ + cfs_time_t lp_timestamp; /* time of last aliveness news */ + cfs_time_t lp_ping_timestamp; /* time of last ping attempt */ + cfs_time_t lp_ping_deadline; /* != 0 if ping reply expected */ + cfs_time_t lp_last_alive; /* when I was last alive */ + cfs_time_t lp_last_query; /* when lp_ni was queried last time */ + lnet_ni_t *lp_ni; /* interface peer is on */ + lnet_nid_t lp_nid; /* peer's NID */ + int lp_refcount; /* # refs */ + int lp_cpt; /* CPT this peer attached on */ + /* # refs from lnet_route_t::lr_gateway */ + int lp_rtr_refcount; + /* returned RC ping features */ + unsigned int lp_ping_feats; + struct list_head lp_routes; /* routers on this peer */ + lnet_rc_data_t *lp_rcd; /* router checker state */ +} lnet_peer_t; + + +/* peer hash size */ +#define LNET_PEER_HASH_BITS 9 +#define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) + +/* peer hash table */ +struct lnet_peer_table { + int pt_version; /* /proc validity stamp */ + int pt_number; /* # peers extant */ + struct list_head pt_deathrow; /* zombie peers */ + struct list_head *pt_hash; /* NID->peer hash */ +}; + +/* peer aliveness is enabled only on routers for peers in a network where the + * lnet_ni_t::ni_peertimeout has been set to a positive value */ +#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \ + (lp)->lp_ni->ni_peertimeout > 0) + +typedef struct { + struct list_head lr_list; /* chain on net */ + struct list_head lr_gwlist; /* chain on gateway */ + lnet_peer_t *lr_gateway; /* router node */ + __u32 lr_net; /* remote network number */ + int lr_seq; /* sequence for round-robin */ + unsigned int lr_downis; /* number of down NIs */ + unsigned int lr_hops; /* how far I am */ +} lnet_route_t; + +#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) +#define LNET_REMOTE_NETS_HASH_MAX (1U << 16) +#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) + +typedef struct { + struct list_head lrn_list; /* chain on ln_remote_nets_hash */ + struct list_head lrn_routes; /* routes to me */ + __u32 lrn_net; /* my net number */ +} lnet_remotenet_t; + +typedef struct { + struct list_head rbp_bufs; /* my free buffer pool */ + struct list_head rbp_msgs; /* messages blocking for a buffer */ + int rbp_npages; /* # pages in each buffer */ + int rbp_nbuffers; /* # buffers */ + int rbp_credits; /* # free buffers / blocked messages */ + int rbp_mincredits; /* low water mark */ +} lnet_rtrbufpool_t; + +typedef struct { + struct list_head rb_list; /* chain on rbp_bufs */ + lnet_rtrbufpool_t *rb_pool; /* owning pool */ + lnet_kiov_t rb_kiov[0]; /* the buffer space */ +} lnet_rtrbuf_t; + +typedef struct { + __u32 msgs_alloc; + __u32 msgs_max; + __u32 errors; + __u32 send_count; + __u32 recv_count; + __u32 route_count; + __u32 drop_count; + __u64 send_length; + __u64 recv_length; + __u64 route_length; + __u64 drop_length; +} WIRE_ATTR lnet_counters_t; + +#define LNET_PEER_HASHSIZE 503 /* prime! */ + +#define LNET_NRBPOOLS 3 /* # different router buffer pools */ + +enum { + /* Didn't match anything */ + LNET_MATCHMD_NONE = (1 << 0), + /* Matched OK */ + LNET_MATCHMD_OK = (1 << 1), + /* Must be discarded */ + LNET_MATCHMD_DROP = (1 << 2), + /* match and buffer is exhausted */ + LNET_MATCHMD_EXHAUSTED = (1 << 3), + /* match or drop */ + LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), +}; + +/* Options for lnet_portal_t::ptl_options */ +#define LNET_PTL_LAZY (1 << 0) +#define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ +#define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */ + +/* parameter for matching operations (GET, PUT) */ +struct lnet_match_info { + __u64 mi_mbits; + lnet_process_id_t mi_id; + unsigned int mi_opc; + unsigned int mi_portal; + unsigned int mi_rlength; + unsigned int mi_roffset; +}; + +/* ME hash of RDMA portal */ +#define LNET_MT_HASH_BITS 8 +#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) +#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) +/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, + * the last entry is reserved for MEs with ignore-bits */ +#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE +/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which + * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the + * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */ +#define LNET_MT_BITS_U64 6 /* 2^6 bits */ +#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) +#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) + +/* portal match table */ +struct lnet_match_table { + /* reserved for upcoming patches, CPU partition ID */ + unsigned int mt_cpt; + unsigned int mt_portal; /* portal index */ + /* match table is set as "enabled" if there's non-exhausted MD + * attached on mt_mhash, it's only valide for wildcard portal */ + unsigned int mt_enabled; + /* bitmap to flag whether MEs on mt_hash are exhausted or not */ + __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; + struct list_head *mt_mhash; /* matching hash */ +}; + +/* these are only useful for wildcard portal */ +/* Turn off message rotor for wildcard portals */ +#define LNET_PTL_ROTOR_OFF 0 +/* round-robin dispatch all PUT messages for wildcard portals */ +#define LNET_PTL_ROTOR_ON 1 +/* round-robin dispatch routed PUT message for wildcard portals */ +#define LNET_PTL_ROTOR_RR_RT 2 +/* dispatch routed PUT message by hashing source NID for wildcard portals */ +#define LNET_PTL_ROTOR_HASH_RT 3 + +typedef struct lnet_portal { + spinlock_t ptl_lock; + unsigned int ptl_index; /* portal ID, reserved */ + /* flags on this portal: lazy, unique... */ + unsigned int ptl_options; + /* list of messags which are stealing buffer */ + struct list_head ptl_msg_stealing; + /* messages blocking for MD */ + struct list_head ptl_msg_delayed; + /* Match table for each CPT */ + struct lnet_match_table **ptl_mtables; + /* spread rotor of incoming "PUT" */ + int ptl_rotor; + /* # active entries for this portal */ + int ptl_mt_nmaps; + /* array of active entries' cpu-partition-id */ + int ptl_mt_maps[0]; +} lnet_portal_t; + +#define LNET_LH_HASH_BITS 12 +#define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) +#define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1) + +/* resource container (ME, MD, EQ) */ +struct lnet_res_container { + unsigned int rec_type; /* container type */ + __u64 rec_lh_cookie; /* cookie generator */ + struct list_head rec_active; /* active resource list */ + struct list_head *rec_lh_hash; /* handle hash */ +#ifdef LNET_USE_LIB_FREELIST + lnet_freelist_t rec_freelist; /* freelist for resources */ +#endif +}; + +/* message container */ +struct lnet_msg_container { + int msc_init; /* initialized or not */ + /* max # threads finalizing */ + int msc_nfinalizers; + /* msgs waiting to complete finalizing */ + struct list_head msc_finalizing; + struct list_head msc_active; /* active message list */ + /* threads doing finalization */ + void **msc_finalizers; +#ifdef LNET_USE_LIB_FREELIST + lnet_freelist_t msc_freelist; /* freelist for messages */ +#endif +}; + +/* Router Checker states */ +#define LNET_RC_STATE_SHUTDOWN 0 /* not started */ +#define LNET_RC_STATE_RUNNING 1 /* started up OK */ +#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ + +typedef struct +{ + /* CPU partition table of LNet */ + struct cfs_cpt_table *ln_cpt_table; + /* number of CPTs in ln_cpt_table */ + unsigned int ln_cpt_number; + unsigned int ln_cpt_bits; + + /* protect LNet resources (ME/MD/EQ) */ + struct cfs_percpt_lock *ln_res_lock; + /* # portals */ + int ln_nportals; + /* the vector of portals */ + lnet_portal_t **ln_portals; + /* percpt ME containers */ + struct lnet_res_container **ln_me_containers; + /* percpt MD container */ + struct lnet_res_container **ln_md_containers; + + /* Event Queue container */ + struct lnet_res_container ln_eq_container; + wait_queue_head_t ln_eq_waitq; + spinlock_t ln_eq_wait_lock; + unsigned int ln_remote_nets_hbits; + + /* protect NI, peer table, credits, routers, rtrbuf... */ + struct cfs_percpt_lock *ln_net_lock; + /* percpt message containers for active/finalizing/freed message */ + struct lnet_msg_container **ln_msg_containers; + lnet_counters_t **ln_counters; + struct lnet_peer_table **ln_peer_tables; + /* failure simulation */ + struct list_head ln_test_peers; + + struct list_head ln_nis; /* LND instances */ + /* NIs bond on specific CPT(s) */ + struct list_head ln_nis_cpt; + /* dying LND instances */ + struct list_head ln_nis_zombie; + lnet_ni_t *ln_loni; /* the loopback NI */ + /* NI to wait for events in */ + lnet_ni_t *ln_eq_waitni; + + /* remote networks with routes to them */ + struct list_head *ln_remote_nets_hash; + /* validity stamp */ + __u64 ln_remote_nets_version; + /* list of all known routers */ + struct list_head ln_routers; + /* validity stamp */ + __u64 ln_routers_version; + /* percpt router buffer pools */ + lnet_rtrbufpool_t **ln_rtrpools; + + lnet_handle_md_t ln_ping_target_md; + lnet_handle_eq_t ln_ping_target_eq; + lnet_ping_info_t *ln_ping_info; + + /* router checker startup/shutdown state */ + int ln_rc_state; + /* router checker's event queue */ + lnet_handle_eq_t ln_rc_eqh; + /* rcd still pending on net */ + struct list_head ln_rcd_deathrow; + /* rcd ready for free */ + struct list_head ln_rcd_zombie; + /* serialise startup/shutdown */ + struct semaphore ln_rc_signal; + + struct mutex ln_api_mutex; + struct mutex ln_lnd_mutex; + int ln_init; /* LNetInit() called? */ + /* Have I called LNetNIInit myself? */ + int ln_niinit_self; + /* LNetNIInit/LNetNIFini counter */ + int ln_refcount; + /* shutdown in progress */ + int ln_shutdown; + + int ln_routing; /* am I a router? */ + lnet_pid_t ln_pid; /* requested pid */ + /* uniquely identifies this ni in this epoch */ + __u64 ln_interface_cookie; + /* registered LNDs */ + struct list_head ln_lnds; + + /* space for network names */ + char *ln_network_tokens; + int ln_network_tokens_nob; + /* test protocol compatibility flags */ + int ln_testprotocompat; + +} lnet_t; + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/linux/api-support.h b/drivers/staging/lustre/include/linux/lnet/linux/api-support.h new file mode 100644 index 000000000000..ca78a0a4e908 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/api-support.h @@ -0,0 +1,43 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LINUX_API_SUPPORT_H__ +#define __LINUX_API_SUPPORT_H__ + +#ifndef __LNET_API_SUPPORT_H__ +#error Do not #include this file directly. #include <lnet /api-support.h> instead +#endif + + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h new file mode 100644 index 000000000000..d2c0a70f1f7e --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h @@ -0,0 +1,72 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_LINUX_LIB_LNET_H__ +#define __LNET_LINUX_LIB_LNET_H__ + +#ifndef __LNET_LIB_LNET_H__ +#error Do not #include this file directly. #include <linux/lnet/lib-lnet.h> instead +#endif + +# include <asm/page.h> +# include <linux/string.h> +# include <asm/io.h> +# include <linux/libcfs/libcfs.h> + +static inline __u64 +lnet_page2phys (struct page *p) +{ + /* compiler optimizer will elide unused branches */ + + switch (sizeof(typeof(page_to_phys(p)))) { + case 4: + /* page_to_phys returns a 32 bit physical address. This must + * be a 32 bit machine with <= 4G memory and we must ensure we + * don't sign extend when converting to 64 bits. */ + return (unsigned long)page_to_phys(p); + + case 8: + /* page_to_phys returns a 64 bit physical address :) */ + return page_to_phys(p); + + default: + LBUG(); + return 0; + } +} + + +#define LNET_ROUTER + +#endif /* __LNET_LINUX_LIB_LNET_H__ */ diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h b/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h new file mode 100644 index 000000000000..669e8c038534 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h @@ -0,0 +1,45 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_LINUX_LIB_TYPES_H__ +#define __LNET_LINUX_LIB_TYPES_H__ + +#ifndef __LNET_LIB_TYPES_H__ +#error Do not #include this file directly. #include <linux/lnet/lib-types.h> instead +#endif + +# include <linux/uio.h> +# include <linux/types.h> + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lnet.h b/drivers/staging/lustre/include/linux/lnet/linux/lnet.h new file mode 100644 index 000000000000..1e888f1efc45 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/lnet.h @@ -0,0 +1,56 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_LINUX_LNET_H__ +#define __LNET_LINUX_LNET_H__ + +#ifndef __LNET_H__ +#error Do not #include this file directly. #include <linux/lnet/lnet.h> instead +#endif + +/* + * lnet.h + * + * User application interface file + */ + +#include <linux/uio.h> +#include <linux/types.h> + +#define cfs_tcp_sendpage(sk, page, offset, size, flags) \ + tcp_sendpage(sk, page, offset, size, flags) + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h b/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h new file mode 100644 index 000000000000..1bde44ebb911 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h @@ -0,0 +1,51 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_SYSCTL_H__ +#define __LNET_SYSCTL_H__ + +#if defined(CONFIG_SYSCTL) + + +#define CTL_KRANAL 201 +#define CTL_O2IBLND 205 +#define CTL_PTLLND 206 +#define CTL_QSWNAL 207 +#define CTL_SOCKLND 208 +#define CTL_GNILND 210 + + +#endif + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnet.h b/drivers/staging/lustre/include/linux/lnet/lnet.h new file mode 100644 index 000000000000..c532b15d7643 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnet.h @@ -0,0 +1,51 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_H__ +#define __LNET_H__ + +/* + * lnet.h + * + * User application interface file + */ +#include <linux/lnet/linux/lnet.h> + +#include <linux/lnet/types.h> +#include <linux/lnet/api.h> + +#define LNET_NIDSTR_COUNT 1024 /* # of nidstrings */ +#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnetctl.h b/drivers/staging/lustre/include/linux/lnet/lnetctl.h new file mode 100644 index 000000000000..b22daa234255 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnetctl.h @@ -0,0 +1,80 @@ +/* + * This file is part of Portals, http://www.sf.net/projects/lustre/ + * + * Portals is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Portals is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Portals; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * header for libptlctl.a + */ +#ifndef _PTLCTL_H_ +#define _PTLCTL_H_ + +#include <linux/libcfs/libcfs.h> +#include <linux/lnet/types.h> + +#define LNET_DEV_ID 0 +#define LNET_DEV_PATH "/dev/lnet" +#define LNET_DEV_MAJOR 10 +#define LNET_DEV_MINOR 240 +#define OBD_DEV_ID 1 +#define OBD_DEV_NAME "obd" +#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME +#define OBD_DEV_MAJOR 10 +#define OBD_DEV_MINOR 241 +#define SMFS_DEV_ID 2 +#define SMFS_DEV_PATH "/dev/snapdev" +#define SMFS_DEV_MAJOR 10 +#define SMFS_DEV_MINOR 242 + +int ptl_initialize(int argc, char **argv); +int jt_ptl_network(int argc, char **argv); +int jt_ptl_list_nids(int argc, char **argv); +int jt_ptl_which_nid(int argc, char **argv); +int jt_ptl_print_interfaces(int argc, char **argv); +int jt_ptl_add_interface(int argc, char **argv); +int jt_ptl_del_interface(int argc, char **argv); +int jt_ptl_print_peers (int argc, char **argv); +int jt_ptl_add_peer (int argc, char **argv); +int jt_ptl_del_peer (int argc, char **argv); +int jt_ptl_print_connections (int argc, char **argv); +int jt_ptl_disconnect(int argc, char **argv); +int jt_ptl_push_connection(int argc, char **argv); +int jt_ptl_print_active_txs(int argc, char **argv); +int jt_ptl_ping(int argc, char **argv); +int jt_ptl_mynid(int argc, char **argv); +int jt_ptl_add_uuid(int argc, char **argv); +int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ +int jt_ptl_close_uuid(int argc, char **argv); +int jt_ptl_del_uuid(int argc, char **argv); +int jt_ptl_add_route (int argc, char **argv); +int jt_ptl_del_route (int argc, char **argv); +int jt_ptl_notify_router (int argc, char **argv); +int jt_ptl_print_routes (int argc, char **argv); +int jt_ptl_fail_nid (int argc, char **argv); +int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_testprotocompat(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); + +int dbg_initialize(int argc, char **argv); +int jt_dbg_filter(int argc, char **argv); +int jt_dbg_show(int argc, char **argv); +int jt_dbg_list(int argc, char **argv); +int jt_dbg_debug_kernel(int argc, char **argv); +int jt_dbg_debug_daemon(int argc, char **argv); +int jt_dbg_debug_file(int argc, char **argv); +int jt_dbg_clear_debug_buf(int argc, char **argv); +int jt_dbg_mark_debug_buf(int argc, char **argv); +int jt_dbg_modules(int argc, char **argv); +int jt_dbg_panic(int argc, char **argv); + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnetst.h b/drivers/staging/lustre/include/linux/lnet/lnetst.h new file mode 100644 index 000000000000..d90f94e94601 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnetst.h @@ -0,0 +1,491 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/lnetst.h + * + * Author: Liang Zhen <liangzhen@clusterfs.com> + */ + +#ifndef __LNET_ST_H__ +#define __LNET_ST_H__ + +#include <linux/libcfs/libcfs.h> +#include <linux/lnet/lnet.h> +#include <linux/lnet/lib-types.h> + +#define LST_FEAT_NONE (0) +#define LST_FEAT_BULK_LEN (1 << 0) /* enable variable page size */ + +#define LST_FEATS_EMPTY (LST_FEAT_NONE) +#define LST_FEATS_MASK (LST_FEAT_NONE | LST_FEAT_BULK_LEN) + +#define LST_NAME_SIZE 32 /* max name buffer length */ + +#define LSTIO_DEBUG 0xC00 /* debug */ +#define LSTIO_SESSION_NEW 0xC01 /* create session */ +#define LSTIO_SESSION_END 0xC02 /* end session */ +#define LSTIO_SESSION_INFO 0xC03 /* query session */ +#define LSTIO_GROUP_ADD 0xC10 /* add group */ +#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */ +#define LSTIO_GROUP_INFO 0xC12 /* query defailt infomation of specified group */ +#define LSTIO_GROUP_DEL 0xC13 /* delete group */ +#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */ +#define LSTIO_GROUP_UPDATE 0xC15 /* update group */ +#define LSTIO_BATCH_ADD 0xC20 /* add batch */ +#define LSTIO_BATCH_START 0xC21 /* start batch */ +#define LSTIO_BATCH_STOP 0xC22 /* stop batch */ +#define LSTIO_BATCH_DEL 0xC23 /* delete batch */ +#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */ +#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */ +#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */ +#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */ +#define LSTIO_STAT_QUERY 0xC30 /* get stats */ + +typedef struct { + lnet_nid_t ses_nid; /* nid of console node */ + __u64 ses_stamp; /* time stamp */ +} lst_sid_t; /*** session id */ + +extern lst_sid_t LST_INVALID_SID; + +typedef struct { + __u64 bat_id; /* unique id in session */ +} lst_bid_t; /*** batch id (group of tests) */ + +/* Status of test node */ +#define LST_NODE_ACTIVE 0x1 /* node in this session */ +#define LST_NODE_BUSY 0x2 /* node is taken by other session */ +#define LST_NODE_DOWN 0x4 /* node is down */ +#define LST_NODE_UNKNOWN 0x8 /* node not in session */ + +typedef struct { + lnet_process_id_t nde_id; /* id of node */ + int nde_state; /* state of node */ +} lstcon_node_ent_t; /*** node entry, for list_group command */ + +typedef struct { + int nle_nnode; /* # of nodes */ + int nle_nactive; /* # of active nodes */ + int nle_nbusy; /* # of busy nodes */ + int nle_ndown; /* # of down nodes */ + int nle_nunknown; /* # of unknown nodes */ +} lstcon_ndlist_ent_t; /*** node_list entry, for list_batch command */ + +typedef struct { + int tse_type; /* test type */ + int tse_loop; /* loop count */ + int tse_concur; /* concurrency of test */ +} lstcon_test_ent_t; /*** test summary entry, for list_batch command */ + +typedef struct { + int bae_state; /* batch status */ + int bae_timeout; /* batch timeout */ + int bae_ntest; /* # of tests in the batch */ +} lstcon_batch_ent_t; /*** batch summary entry, for list_batch command */ + +typedef struct { + lstcon_ndlist_ent_t tbe_cli_nle; /* client (group) node_list entry */ + lstcon_ndlist_ent_t tbe_srv_nle; /* server (group) node_list entry */ + union { + lstcon_test_ent_t tbe_test; /* test entry */ + lstcon_batch_ent_t tbe_batch; /* batch entry */ + } u; +} lstcon_test_batch_ent_t; /*** test/batch verbose information entry, + *** for list_batch command */ + +typedef struct { + struct list_head rpe_link; /* link chain */ + lnet_process_id_t rpe_peer; /* peer's id */ + struct timeval rpe_stamp; /* time stamp of RPC */ + int rpe_state; /* peer's state */ + int rpe_rpc_errno; /* RPC errno */ + + lst_sid_t rpe_sid; /* peer's session id */ + int rpe_fwk_errno; /* framework errno */ + int rpe_priv[4]; /* private data */ + char rpe_payload[0]; /* private reply payload */ +} lstcon_rpc_ent_t; + +typedef struct { + int trs_rpc_stat[4]; /* RPCs stat (0: total, 1: failed, 2: finished, 4: reserved */ + int trs_rpc_errno; /* RPC errno */ + int trs_fwk_stat[8]; /* framework stat */ + int trs_fwk_errno; /* errno of the first remote error */ + void *trs_fwk_private; /* private framework stat */ +} lstcon_trans_stat_t; + +static inline int +lstcon_rpc_stat_total(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0]; +} + +static inline int +lstcon_rpc_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1]; +} + +static inline int +lstcon_rpc_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2]; +} + +static inline int +lstcon_sesop_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_sesop_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_sesqry_stat_active(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_sesqry_stat_busy(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_sesqry_stat_unknown(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; +} + +static inline int +lstcon_tsbop_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_tsbop_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_tsbqry_stat_idle(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_tsbqry_stat_run(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_tsbqry_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; +} + +static inline int +lstcon_statqry_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_statqry_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +/* create a session */ +typedef struct { + int lstio_ses_key; /* IN: local key */ + int lstio_ses_timeout; /* IN: session timeout */ + int lstio_ses_force; /* IN: force create ? */ + /** IN: session features */ + unsigned lstio_ses_feats; + lst_sid_t *lstio_ses_idp; /* OUT: session id */ + int lstio_ses_nmlen; /* IN: name length */ + char *lstio_ses_namep; /* IN: session name */ +} lstio_session_new_args_t; + +/* query current session */ +typedef struct { + lst_sid_t *lstio_ses_idp; /* OUT: session id */ + int *lstio_ses_keyp; /* OUT: local key */ + /** OUT: session features */ + unsigned *lstio_ses_featp; + lstcon_ndlist_ent_t *lstio_ses_ndinfo; /* OUT: */ + int lstio_ses_nmlen; /* IN: name length */ + char *lstio_ses_namep; /* OUT: session name */ +} lstio_session_info_args_t; + +/* delete a session */ +typedef struct { + int lstio_ses_key; /* IN: session key */ +} lstio_session_end_args_t; + +#define LST_OPC_SESSION 1 +#define LST_OPC_GROUP 2 +#define LST_OPC_NODES 3 +#define LST_OPC_BATCHCLI 4 +#define LST_OPC_BATCHSRV 5 + +typedef struct { + int lstio_dbg_key; /* IN: session key */ + int lstio_dbg_type; /* IN: debug sessin|batch|group|nodes list */ + int lstio_dbg_flags; /* IN: reserved debug flags */ + int lstio_dbg_timeout; /* IN: timeout of debug */ + + int lstio_dbg_nmlen; /* IN: len of name */ + char *lstio_dbg_namep; /* IN: name of group|batch */ + int lstio_dbg_count; /* IN: # of test nodes to debug */ + lnet_process_id_t *lstio_dbg_idsp; /* IN: id of test nodes */ + struct list_head *lstio_dbg_resultp; /* OUT: list head of result buffer */ +} lstio_debug_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ +} lstio_group_add_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ +} lstio_group_del_args_t; + +#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */ +#define LST_GROUP_REFRESH 2 /* refresh inactive nodes in the group */ +#define LST_GROUP_RMND 3 /* delete nodes from the group */ + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_opc; /* IN: OPC */ + int lstio_grp_args; /* IN: arguments */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ + int lstio_grp_count; /* IN: # of nodes id */ + lnet_process_id_t *lstio_grp_idsp; /* IN: array of nodes */ + struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */ +} lstio_group_update_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ + int lstio_grp_count; /* IN: # of nodes */ + /** OUT: session features */ + unsigned *lstio_grp_featp; + lnet_process_id_t *lstio_grp_idsp; /* IN: nodes */ + struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */ +} lstio_group_nodes_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_idx; /* IN: group idx */ + int lstio_grp_nmlen; /* IN: name len */ + char *lstio_grp_namep; /* OUT: name */ +} lstio_group_list_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name len */ + char *lstio_grp_namep; /* IN: name */ + lstcon_ndlist_ent_t *lstio_grp_entp; /* OUT: description of group */ + + int *lstio_grp_idxp; /* IN/OUT: node index */ + int *lstio_grp_ndentp; /* IN/OUT: # of nodent */ + lstcon_node_ent_t *lstio_grp_dentsp; /* OUT: nodent array */ +} lstio_group_info_args_t; + +#define LST_DEFAULT_BATCH "batch" /* default batch name */ + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ +} lstio_batch_add_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ +} lstio_batch_del_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_timeout; /* IN: timeout for the batch */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ + struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ +} lstio_batch_run_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_force; /* IN: abort unfinished test RPC */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ + struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ +} lstio_batch_stop_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_testidx; /* IN: test index */ + int lstio_bat_client; /* IN: is test client? */ + int lstio_bat_timeout; /* IN: timeout for waiting */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ + struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ +} lstio_batch_query_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_idx; /* IN: index */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ +} lstio_batch_list_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: name */ + int lstio_bat_server; /* IN: query server or not */ + int lstio_bat_testidx; /* IN: test index */ + lstcon_test_batch_ent_t *lstio_bat_entp; /* OUT: batch ent */ + + int *lstio_bat_idxp; /* IN/OUT: index of node */ + int *lstio_bat_ndentp; /* IN/OUT: # of nodent */ + lstcon_node_ent_t *lstio_bat_dentsp; /* array of nodent */ +} lstio_batch_info_args_t; + +/* add stat in session */ +typedef struct { + int lstio_sta_key; /* IN: session key */ + int lstio_sta_timeout; /* IN: timeout for stat requst */ + int lstio_sta_nmlen; /* IN: group name length */ + char *lstio_sta_namep; /* IN: group name */ + int lstio_sta_count; /* IN: # of pid */ + lnet_process_id_t *lstio_sta_idsp; /* IN: pid */ + struct list_head *lstio_sta_resultp; /* OUT: list head of result buffer */ +} lstio_stat_args_t; + +typedef enum { + LST_TEST_BULK = 1, + LST_TEST_PING = 2 +} lst_test_type_t; + +/* create a test in a batch */ +#define LST_MAX_CONCUR 1024 /* Max concurrency of test */ + +typedef struct { + int lstio_tes_key; /* IN: session key */ + int lstio_tes_bat_nmlen; /* IN: batch name len */ + char *lstio_tes_bat_name; /* IN: batch name */ + int lstio_tes_type; /* IN: test type */ + int lstio_tes_oneside; /* IN: one sided test */ + int lstio_tes_loop; /* IN: loop count */ + int lstio_tes_concur; /* IN: concurrency */ + + int lstio_tes_dist; /* IN: node distribution in destination groups */ + int lstio_tes_span; /* IN: node span in destination groups */ + int lstio_tes_sgrp_nmlen; /* IN: source group name length */ + char *lstio_tes_sgrp_name; /* IN: group name */ + int lstio_tes_dgrp_nmlen; /* IN: destination group name length */ + char *lstio_tes_dgrp_name; /* IN: group name */ + + int lstio_tes_param_len; /* IN: param buffer len */ + void *lstio_tes_param; /* IN: parameter for specified test: + lstio_bulk_param_t, + lstio_ping_param_t, + ... more */ + int *lstio_tes_retp; /* OUT: private returned value */ + struct list_head *lstio_tes_resultp; /* OUT: list head of result buffer */ +} lstio_test_args_t; + +typedef enum { + LST_BRW_READ = 1, + LST_BRW_WRITE = 2 +} lst_brw_type_t; + +typedef enum { + LST_BRW_CHECK_NONE = 1, + LST_BRW_CHECK_SIMPLE = 2, + LST_BRW_CHECK_FULL = 3 +} lst_brw_flags_t; + +typedef struct { + int blk_opc; /* bulk operation code */ + int blk_size; /* size (bytes) */ + int blk_time; /* time of running the test*/ + int blk_flags; /* reserved flags */ +} lst_test_bulk_param_t; + +typedef struct { + int png_size; /* size of ping message */ + int png_time; /* time */ + int png_loop; /* loop */ + int png_flags; /* reserved flags */ +} lst_test_ping_param_t; + +/* more tests */ +typedef struct { + __u32 errors; + __u32 rpcs_sent; + __u32 rpcs_rcvd; + __u32 rpcs_dropped; + __u32 rpcs_expired; + __u64 bulk_get; + __u64 bulk_put; +} WIRE_ATTR srpc_counters_t; + +typedef struct { + /** milliseconds since current session started */ + __u32 running_ms; + __u32 active_batches; + __u32 zombie_sessions; + __u32 brw_errors; + __u32 ping_errors; +} WIRE_ATTR sfw_counters_t; + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/ptllnd.h b/drivers/staging/lustre/include/linux/lnet/ptllnd.h new file mode 100644 index 000000000000..fc1ce8ed1f8b --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/ptllnd.h @@ -0,0 +1,94 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/ptllnd.h + * + * Author: PJ Kirner <pjkirner@clusterfs.com> + */ + +/* + * The PTLLND was designed to support Portals with + * Lustre and non-lustre UNLINK semantics. + * However for now the two targets are Cray Portals + * on the XT3 and Lustre Portals (for testing) both + * have Lustre UNLINK semantics, so this is defined + * by default. + */ +#define LUSTRE_PORTALS_UNLINK_SEMANTICS + + +#ifdef _USING_LUSTRE_PORTALS_ + +/* NIDs are 64-bits on Lustre Portals */ +#define FMT_NID LPU64 +#define FMT_PID "%d" + +/* When using Lustre Portals Lustre completion semantics are imlicit*/ +#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 + +#else /* _USING_CRAY_PORTALS_ */ + +/* NIDs are integers on Cray Portals */ +#define FMT_NID "%u" +#define FMT_PID "%d" + +/* When using Cray Portals this is defined in the Cray Portals Header*/ +/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */ + +/* Can compare handles directly on Cray Portals */ +#define PtlHandleIsEqual(a,b) ((a) == (b)) + +/* Diffrent error types on Cray Portals*/ +#define ptl_err_t ptl_ni_fail_t + +/* + * The Cray Portals has no maximum number of IOVs. The + * maximum is limited only by memory and size of the + * int parameters (2^31-1). + * Lustre only really require that the underyling + * implemenation to support at least LNET_MAX_IOV, + * so for Cray portals we can safely just use that + * value here. + * + */ +#define PTL_MD_MAX_IOV LNET_MAX_IOV + +#endif + +#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID + +/* Align incoming small request messages to an 8 byte boundary if this is + * supported to avoid alignment issues on some architectures */ +#ifndef PTL_MD_LOCAL_ALIGN8 +# define PTL_MD_LOCAL_ALIGN8 0 +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h b/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h new file mode 100644 index 000000000000..7d12b3a23a96 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h @@ -0,0 +1,124 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/ptllnd_wire.h + * + * Author: PJ Kirner <pjkirner@clusterfs.com> + */ + +/* Minimum buffer size that any peer will post to receive ptllnd messages */ +#define PTLLND_MIN_BUFFER_SIZE 256 + +/************************************************************************ + * Tunable defaults that {u,k}lnds/ptllnd should have in common. + */ + +#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */ +#define PTLLND_PID 9 /* The Portals PID */ +#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */ + +/* Default buffer size for kernel ptllnds (guaranteed eager) */ +#define PTLLND_MAX_KLND_MSG_SIZE 512 + +/* Default buffer size for catamount ptllnds (not guaranteed eager) - large + * enough to avoid RDMA for anything sent while control is not in liblustre */ +#define PTLLND_MAX_ULND_MSG_SIZE 512 + + +/************************************************************************ + * Portals LND Wire message format. + * These are sent in sender's byte order (i.e. receiver flips). + */ + +#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved + * above is for bulk data transfer */ +#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */ + +typedef struct +{ + lnet_hdr_t kptlim_hdr; /* portals header */ + char kptlim_payload[0]; /* piggy-backed payload */ +} WIRE_ATTR kptl_immediate_msg_t; + +typedef struct +{ + lnet_hdr_t kptlrm_hdr; /* portals header */ + __u64 kptlrm_matchbits; /* matchbits */ +} WIRE_ATTR kptl_rdma_msg_t; + +typedef struct +{ + __u64 kptlhm_matchbits; /* matchbits */ + __u32 kptlhm_max_msg_size; /* max message size */ +} WIRE_ATTR kptl_hello_msg_t; + +typedef struct +{ + /* First 2 fields fixed FOR ALL TIME */ + __u32 ptlm_magic; /* I'm a Portals LND message */ + __u16 ptlm_version; /* this is my version number */ + __u8 ptlm_type; /* the message type */ + __u8 ptlm_credits; /* returned credits */ + __u32 ptlm_nob; /* # bytes in whole message */ + __u32 ptlm_cksum; /* checksum (0 == no checksum) */ + __u64 ptlm_srcnid; /* sender's NID */ + __u64 ptlm_srcstamp; /* sender's incarnation */ + __u64 ptlm_dstnid; /* destination's NID */ + __u64 ptlm_dststamp; /* destination's incarnation */ + __u32 ptlm_srcpid; /* sender's PID */ + __u32 ptlm_dstpid; /* destination's PID */ + + union { + kptl_immediate_msg_t immediate; + kptl_rdma_msg_t rdma; + kptl_hello_msg_t hello; + } WIRE_ATTR ptlm_u; + +} kptl_msg_t; + +/* kptl_msg_t::ptlm_credits is only a __u8 */ +#define PTLLND_MSG_MAX_CREDITS ((typeof(((kptl_msg_t*) 0)->ptlm_credits)) -1) + +#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC +#define PTLLND_MSG_VERSION 0x04 + +#define PTLLND_RDMA_OK 0x00 +#define PTLLND_RDMA_FAIL 0x01 + +#define PTLLND_MSG_TYPE_INVALID 0x00 +#define PTLLND_MSG_TYPE_PUT 0x01 +#define PTLLND_MSG_TYPE_GET 0x02 +#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/ +#define PTLLND_MSG_TYPE_NOOP 0x04 +#define PTLLND_MSG_TYPE_HELLO 0x05 +#define PTLLND_MSG_TYPE_NAK 0x06 diff --git a/drivers/staging/lustre/include/linux/lnet/socklnd.h b/drivers/staging/lustre/include/linux/lnet/socklnd.h new file mode 100644 index 000000000000..bacc74933a39 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/socklnd.h @@ -0,0 +1,103 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/socklnd.h + * + * #defines shared between socknal implementation and utilities + */ +#ifndef __LNET_LNET_SOCKLND_H__ +#define __LNET_LNET_SOCKLND_H__ + +#include <linux/lnet/types.h> +#include <linux/lnet/lib-types.h> + +#define SOCKLND_CONN_NONE (-1) +#define SOCKLND_CONN_ANY 0 +#define SOCKLND_CONN_CONTROL 1 +#define SOCKLND_CONN_BULK_IN 2 +#define SOCKLND_CONN_BULK_OUT 3 +#define SOCKLND_CONN_NTYPES 4 + +#define SOCKLND_CONN_ACK SOCKLND_CONN_BULK_IN + +typedef struct { + __u32 kshm_magic; /* magic number of socklnd message */ + __u32 kshm_version; /* version of socklnd message */ + lnet_nid_t kshm_src_nid; /* sender's nid */ + lnet_nid_t kshm_dst_nid; /* destination nid */ + lnet_pid_t kshm_src_pid; /* sender's pid */ + lnet_pid_t kshm_dst_pid; /* destination pid */ + __u64 kshm_src_incarnation; /* sender's incarnation */ + __u64 kshm_dst_incarnation; /* destination's incarnation */ + __u32 kshm_ctype; /* connection type */ + __u32 kshm_nips; /* # IP addrs */ + __u32 kshm_ips[0]; /* IP addrs */ +} WIRE_ATTR ksock_hello_msg_t; + +typedef struct { + lnet_hdr_t ksnm_hdr; /* lnet hdr */ + + /* + * ksnm_payload is removed because of winnt compiler's limitation: + * zero-sized array can only be placed at the tail of [nested] + * structure definitions. lnet payload will be stored just after + * the body of structure ksock_lnet_msg_t + */ +} WIRE_ATTR ksock_lnet_msg_t; + +typedef struct { + __u32 ksm_type; /* type of socklnd message */ + __u32 ksm_csum; /* checksum if != 0 */ + __u64 ksm_zc_cookies[2]; /* Zero-Copy request/ACK cookie */ + union { + ksock_lnet_msg_t lnetmsg; /* lnet message, it's empty if it's NOOP */ + } WIRE_ATTR ksm_u; +} WIRE_ATTR ksock_msg_t; + +static inline void +socklnd_init_msg(ksock_msg_t *msg, int type) +{ + msg->ksm_csum = 0; + msg->ksm_type = type; + msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0; +} + +#define KSOCK_MSG_NOOP 0xc0 /* ksm_u empty */ +#define KSOCK_MSG_LNET 0xc1 /* lnet msg */ + +/* We need to know this number to parse hello msg from ksocklnd in + * other LND (usocklnd, for example) */ +#define KSOCK_PROTO_V2 2 +#define KSOCK_PROTO_V3 3 + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h new file mode 100644 index 000000000000..4f63b7acb9d7 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/types.h @@ -0,0 +1,503 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_TYPES_H__ +#define __LNET_TYPES_H__ + +/** \addtogroup lnet + * @{ */ + +#include <linux/libcfs/libcfs.h> + +/** \addtogroup lnet_addr + * @{ */ + +/** Portal reserved for LNet's own use. + * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments. + */ +#define LNET_RESERVED_PORTAL 0 + +/** + * Address of an end-point in an LNet network. + * + * A node can have multiple end-points and hence multiple addresses. + * An LNet network can be a simple network (e.g. tcp0) or a network of + * LNet networks connected by LNet routers. Therefore an end-point address + * has two parts: network ID, and address within a network. + * + * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID. + */ +typedef __u64 lnet_nid_t; +/** + * ID of a process in a node. Shortened as PID to distinguish from + * lnet_process_id_t, the global process ID. + */ +typedef __u32 lnet_pid_t; + +/** wildcard NID that matches any end-point address */ +#define LNET_NID_ANY ((lnet_nid_t) -1) +/** wildcard PID that matches any lnet_pid_t */ +#define LNET_PID_ANY ((lnet_pid_t) -1) + +#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */ +#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ + +#define LNET_TIME_FOREVER (-1) + +/** + * Objects maintained by the LNet are accessed through handles. Handle types + * have names of the form lnet_handle_xx_t, where xx is one of the two letter + * object type codes ('eq' for event queue, 'md' for memory descriptor, and + * 'me' for match entry). + * Each type of object is given a unique handle type to enhance type checking. + * The type lnet_handle_any_t can be used when a generic handle is needed. + * Every handle value can be converted into a value of type lnet_handle_any_t + * without loss of information. + */ +typedef struct { + __u64 cookie; +} lnet_handle_any_t; + +typedef lnet_handle_any_t lnet_handle_eq_t; +typedef lnet_handle_any_t lnet_handle_md_t; +typedef lnet_handle_any_t lnet_handle_me_t; + +#define LNET_WIRE_HANDLE_COOKIE_NONE (-1) + +/** + * Invalidate handle \a h. + */ +static inline void LNetInvalidateHandle(lnet_handle_any_t *h) +{ + h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; +} + +/** + * Compare handles \a h1 and \a h2. + * + * \return 1 if handles are equal, 0 if otherwise. + */ +static inline int LNetHandleIsEqual (lnet_handle_any_t h1, lnet_handle_any_t h2) +{ + return (h1.cookie == h2.cookie); +} + +/** + * Check whether handle \a h is invalid. + * + * \return 1 if handle is invalid, 0 if valid. + */ +static inline int LNetHandleIsInvalid(lnet_handle_any_t h) +{ + return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie); +} + +/** + * Global process ID. + */ +typedef struct { + /** node id */ + lnet_nid_t nid; + /** process id */ + lnet_pid_t pid; +} lnet_process_id_t; +/** @} lnet_addr */ + +/** \addtogroup lnet_me + * @{ */ + +/** + * Specifies whether the match entry or memory descriptor should be unlinked + * automatically (LNET_UNLINK) or not (LNET_RETAIN). + */ +typedef enum { + LNET_RETAIN = 0, + LNET_UNLINK +} lnet_unlink_t; + +/** + * Values of the type lnet_ins_pos_t are used to control where a new match + * entry is inserted. The value LNET_INS_BEFORE is used to insert the new + * entry before the current entry or before the head of the list. The value + * LNET_INS_AFTER is used to insert the new entry after the current entry + * or after the last item in the list. + */ +typedef enum { + /** insert ME before current position or head of the list */ + LNET_INS_BEFORE, + /** insert ME after current position or tail of the list */ + LNET_INS_AFTER, + /** attach ME at tail of local CPU partition ME list */ + LNET_INS_LOCAL +} lnet_ins_pos_t; + +/** @} lnet_me */ + +/** \addtogroup lnet_md + * @{ */ + +/** + * Defines the visible parts of a memory descriptor. Values of this type + * are used to initialize memory descriptors. + */ +typedef struct { + /** + * Specify the memory region associated with the memory descriptor. + * If the options field has: + * - LNET_MD_KIOV bit set: The start field points to the starting + * address of an array of lnet_kiov_t and the length field specifies + * the number of entries in the array. The length can't be bigger + * than LNET_MAX_IOV. The lnet_kiov_t is used to describe page-based + * fragments that are not necessarily mapped in virtal memory. + * - LNET_MD_IOVEC bit set: The start field points to the starting + * address of an array of struct iovec and the length field specifies + * the number of entries in the array. The length can't be bigger + * than LNET_MAX_IOV. The struct iovec is used to describe fragments + * that have virtual addresses. + * - Otherwise: The memory region is contiguous. The start field + * specifies the starting address for the memory region and the + * length field specifies its length. + * + * When the memory region is fragmented, all fragments but the first + * one must start on page boundary, and all but the last must end on + * page boundary. + */ + void *start; + unsigned int length; + /** + * Specifies the maximum number of operations that can be performed + * on the memory descriptor. An operation is any action that could + * possibly generate an event. In the usual case, the threshold value + * is decremented for each operation on the MD. When the threshold + * drops to zero, the MD becomes inactive and does not respond to + * operations. A threshold value of LNET_MD_THRESH_INF indicates that + * there is no bound on the number of operations that may be applied + * to a MD. + */ + int threshold; + /** + * Specifies the largest incoming request that the memory descriptor + * should respond to. When the unused portion of a MD (length - + * local offset) falls below this value, the MD becomes inactive and + * does not respond to further operations. This value is only used + * if the LNET_MD_MAX_SIZE option is set. + */ + int max_size; + /** + * Specifies the behavior of the memory descriptor. A bitwise OR + * of the following values can be used: + * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD. + * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD. + * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory + * region is provided by the incoming request. By default, the + * offset is maintained locally. When maintained locally, the + * offset is incremented by the length of the request so that + * the next operation (PUT or GET) will access the next part of + * the memory region. Note that only one offset variable exists + * per memory descriptor. If both PUT and GET operations are + * performed on a memory descriptor, the offset is updated each time. + * - LNET_MD_TRUNCATE: The length provided in the incoming request can + * be reduced to match the memory available in the region (determined + * by subtracting the offset from the length of the memory region). + * By default, if the length in the incoming operation is greater + * than the amount of memory available, the operation is rejected. + * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for + * incoming PUT operations, even if requested. By default, + * acknowledgments are sent for PUT operations that request an + * acknowledgment. Acknowledgments are never sent for GET operations. + * The data sent in the REPLY serves as an implicit acknowledgment. + * - LNET_MD_KIOV: The start and length fields specify an array of + * lnet_kiov_t. + * - LNET_MD_IOVEC: The start and length fields specify an array of + * struct iovec. + * - LNET_MD_MAX_SIZE: The max_size field is valid. + * + * Note: + * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather + * capability for memory descriptors. They can't be both set. + * - When LNET_MD_MAX_SIZE is set, the total length of the memory + * region (i.e. sum of all fragment lengths) must not be less than + * \a max_size. + */ + unsigned int options; + /** + * A user-specified value that is associated with the memory + * descriptor. The value does not need to be a pointer, but must fit + * in the space used by a pointer. This value is recorded in events + * associated with operations on this MD. + */ + void *user_ptr; + /** + * A handle for the event queue used to log the operations performed on + * the memory region. If this argument is a NULL handle (i.e. nullified + * by LNetInvalidateHandle()), operations performed on this memory + * descriptor are not logged. + */ + lnet_handle_eq_t eq_handle; +} lnet_md_t; + +/* Max Transfer Unit (minimum supported everywhere). + * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) + * these limits are system wide and not interface-local. */ +#define LNET_MTU_BITS 20 +#define LNET_MTU (1 << LNET_MTU_BITS) + +/** limit on the number of fragments in discontiguous MDs */ +#define LNET_MAX_IOV 256 + +/* Max payload size */ +# define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD +# if (LNET_MAX_PAYLOAD < LNET_MTU) +# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" +# else +# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) +/* PAGE_SIZE is a constant: check with cpp! */ +# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" +# endif +# endif + +/** + * Options for the MD structure. See lnet_md_t::options. + */ +#define LNET_MD_OP_PUT (1 << 0) +/** See lnet_md_t::options. */ +#define LNET_MD_OP_GET (1 << 1) +/** See lnet_md_t::options. */ +#define LNET_MD_MANAGE_REMOTE (1 << 2) +/* unused (1 << 3) */ +/** See lnet_md_t::options. */ +#define LNET_MD_TRUNCATE (1 << 4) +/** See lnet_md_t::options. */ +#define LNET_MD_ACK_DISABLE (1 << 5) +/** See lnet_md_t::options. */ +#define LNET_MD_IOVEC (1 << 6) +/** See lnet_md_t::options. */ +#define LNET_MD_MAX_SIZE (1 << 7) +/** See lnet_md_t::options. */ +#define LNET_MD_KIOV (1 << 8) + +/* For compatibility with Cray Portals */ +#define LNET_MD_PHYS 0 + +/** Infinite threshold on MD operations. See lnet_md_t::threshold */ +#define LNET_MD_THRESH_INF (-1) + +/* NB lustre portals uses struct iovec internally! */ +typedef struct iovec lnet_md_iovec_t; + +/** + * A page-based fragment of a MD. + */ +typedef struct { + /** Pointer to the page where the fragment resides */ + struct page *kiov_page; + /** Length in bytes of the fragment */ + unsigned int kiov_len; + /** + * Starting offset of the fragment within the page. Note that the + * end of the fragment must not pass the end of the page; i.e., + * kiov_len + kiov_offset <= PAGE_CACHE_SIZE. + */ + unsigned int kiov_offset; +} lnet_kiov_t; +/** @} lnet_md */ + +/** \addtogroup lnet_eq + * @{ */ + +/** + * Six types of events can be logged in an event queue. + */ +typedef enum { + /** An incoming GET operation has completed on the MD. */ + LNET_EVENT_GET = 1, + /** + * An incoming PUT operation has completed on the MD. The + * underlying layers will not alter the memory (on behalf of this + * operation) once this event has been logged. + */ + LNET_EVENT_PUT, + /** + * A REPLY operation has completed. This event is logged after the + * data (if any) from the REPLY has been written into the MD. + */ + LNET_EVENT_REPLY, + /** An acknowledgment has been received. */ + LNET_EVENT_ACK, + /** + * An outgoing send (PUT or GET) operation has completed. This event + * is logged after the entire buffer has been sent and it is safe for + * the caller to reuse the buffer. + * + * Note: + * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can + * happen even when the message has not yet been put out on wire. + * - It's unsafe to assume that in an outgoing GET operation + * the LNET_EVENT_SEND event would happen before the + * LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and + * LNET_EVENT_ACK events in an outgoing PUT operation. + */ + LNET_EVENT_SEND, + /** + * A MD has been unlinked. Note that LNetMDUnlink() does not + * necessarily trigger an LNET_EVENT_UNLINK event. + * \see LNetMDUnlink + */ + LNET_EVENT_UNLINK, +} lnet_event_kind_t; + +#define LNET_SEQ_BASETYPE long +typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t; +#define LNET_SEQ_GT(a,b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0) + +/* XXX + * cygwin need the pragma line, not clear if it's needed in other places. + * checking!!! + */ +#ifdef __CYGWIN__ +#pragma pack(push, 4) +#endif + +/** + * Information about an event on a MD. + */ +typedef struct { + /** The identifier (nid, pid) of the target. */ + lnet_process_id_t target; + /** The identifier (nid, pid) of the initiator. */ + lnet_process_id_t initiator; + /** + * The NID of the immediate sender. If the request has been forwarded + * by routers, this is the NID of the last hop; otherwise it's the + * same as the initiator. + */ + lnet_nid_t sender; + /** Indicates the type of the event. */ + lnet_event_kind_t type; + /** The portal table index specified in the request */ + unsigned int pt_index; + /** A copy of the match bits specified in the request. */ + __u64 match_bits; + /** The length (in bytes) specified in the request. */ + unsigned int rlength; + /** + * The length (in bytes) of the data that was manipulated by the + * operation. For truncated operations, the manipulated length will be + * the number of bytes specified by the MD (possibly with an offset, + * see lnet_md_t). For all other operations, the manipulated length + * will be the length of the requested operation, i.e. rlength. + */ + unsigned int mlength; + /** + * The handle to the MD associated with the event. The handle may be + * invalid if the MD has been unlinked. + */ + lnet_handle_md_t md_handle; + /** + * A snapshot of the state of the MD immediately after the event has + * been processed. In particular, the threshold field in md will + * reflect the value of the threshold after the operation occurred. + */ + lnet_md_t md; + /** + * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT. + * \see LNetPut + */ + __u64 hdr_data; + /** + * Indicates the completion status of the operation. It's 0 for + * successful operations, otherwise it's an error code. + */ + int status; + /** + * Indicates whether the MD has been unlinked. Note that: + * - An event with unlinked set is the last event on the MD. + * - This field is also set for an explicit LNET_EVENT_UNLINK event. + * \see LNetMDUnlink + */ + int unlinked; + /** + * The displacement (in bytes) into the memory region that the + * operation used. The offset can be determined by the operation for + * a remote managed MD or by the local MD. + * \see lnet_md_t::options + */ + unsigned int offset; + /** + * The sequence number for this event. Sequence numbers are unique + * to each event. + */ + volatile lnet_seq_t sequence; +} lnet_event_t; +#ifdef __CYGWIN__ +#pragma pop +#endif + +/** + * Event queue handler function type. + * + * The EQ handler runs for each event that is deposited into the EQ. The + * handler is supplied with a pointer to the event that triggered the + * handler invocation. + * + * The handler must not block, must be reentrant, and must not call any LNet + * API functions. It should return as quickly as possible. + */ +typedef void (*lnet_eq_handler_t)(lnet_event_t *event); +#define LNET_EQ_HANDLER_NONE NULL +/** @} lnet_eq */ + +/** \addtogroup lnet_data + * @{ */ + +/** + * Specify whether an acknowledgment should be sent by target when the PUT + * operation completes (i.e., when the data has been written to a MD of the + * target process). + * + * \see lnet_md_t::options for the discussion on LNET_MD_ACK_DISABLE by which + * acknowledgments can be disabled for a MD. + */ +typedef enum { + /** Request an acknowledgment */ + LNET_ACK_REQ, + /** Request that no acknowledgment should be generated. */ + LNET_NOACK_REQ +} lnet_ack_req_t; +/** @} lnet_data */ + +/** @} lnet */ +#endif |