diff options
Diffstat (limited to 'drivers/staging/lustre/include/uapi/linux')
18 files changed, 7282 insertions, 0 deletions
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h new file mode 100644 index 000000000000..c4d9472b374f --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h @@ -0,0 +1,149 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2014, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_debug.h + * + * Debug messages and assertions + * + */ + +#ifndef __UAPI_LIBCFS_DEBUG_H__ +#define __UAPI_LIBCFS_DEBUG_H__ + +/** + * Format for debug message headers + */ +struct ptldebug_header { + __u32 ph_len; + __u32 ph_flags; + __u32 ph_subsys; + __u32 ph_mask; + __u16 ph_cpu_id; + __u16 ph_type; + /* time_t overflow in 2106 */ + __u32 ph_sec; + __u64 ph_usec; + __u32 ph_stack; + __u32 ph_pid; + __u32 ph_extern_pid; + __u32 ph_line_num; +} __attribute__((packed)); + +#define PH_FLAG_FIRST_RECORD 1 + +/* Debugging subsystems (32 bits, non-overlapping) */ +#define S_UNDEFINED 0x00000001 +#define S_MDC 0x00000002 +#define S_MDS 0x00000004 +#define S_OSC 0x00000008 +#define S_OST 0x00000010 +#define S_CLASS 0x00000020 +#define S_LOG 0x00000040 +#define S_LLITE 0x00000080 +#define S_RPC 0x00000100 +#define S_MGMT 0x00000200 +#define S_LNET 0x00000400 +#define S_LND 0x00000800 /* ALL LNDs */ +#define S_PINGER 0x00001000 +#define S_FILTER 0x00002000 +#define S_LIBCFS 0x00004000 +#define S_ECHO 0x00008000 +#define S_LDLM 0x00010000 +#define S_LOV 0x00020000 +#define S_LQUOTA 0x00040000 +#define S_OSD 0x00080000 +#define S_LFSCK 0x00100000 +#define S_SNAPSHOT 0x00200000 +/* unused */ +#define S_LMV 0x00800000 /* b_new_cmd */ +/* unused */ +#define S_SEC 0x02000000 /* upcall cache */ +#define S_GSS 0x04000000 /* b_new_cmd */ +/* unused */ +#define S_MGC 0x10000000 +#define S_MGS 0x20000000 +#define S_FID 0x40000000 /* b_new_cmd */ +#define S_FLD 0x80000000 /* b_new_cmd */ + +#define LIBCFS_DEBUG_SUBSYS_NAMES { \ + "undefined", "mdc", "mds", "osc", "ost", "class", "log", \ + "llite", "rpc", "mgmt", "lnet", "lnd", "pinger", "filter", \ + "libcfs", "echo", "ldlm", "lov", "lquota", "osd", "lfsck", \ + "snapshot", "", "lmv", "", "sec", "gss", "", "mgc", "mgs", \ + "fid", "fld", NULL } + +/* Debugging masks (32 bits, non-overlapping) */ +#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ +#define D_INODE 0x00000002 +#define D_SUPER 0x00000004 +#define D_EXT2 0x00000008 /* anything from ext2_debug */ +#define D_MALLOC 0x00000010 /* print malloc, free information */ +#define D_CACHE 0x00000020 /* cache-related items */ +#define D_INFO 0x00000040 /* general information */ +#define D_IOCTL 0x00000080 /* ioctl related information */ +#define D_NETERROR 0x00000100 /* network errors */ +#define D_NET 0x00000200 /* network communications */ +#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */ +#define D_BUFFS 0x00000800 +#define D_OTHER 0x00001000 +#define D_DENTRY 0x00002000 +#define D_NETTRACE 0x00004000 +#define D_PAGE 0x00008000 /* bulk page handling */ +#define D_DLMTRACE 0x00010000 +#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */ +#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ +#define D_HA 0x00080000 /* recovery and failover */ +#define D_RPCTRACE 0x00100000 /* for distributed debugging */ +#define D_VFSTRACE 0x00200000 +#define D_READA 0x00400000 /* read-ahead */ +#define D_MMAP 0x00800000 +#define D_CONFIG 0x01000000 +#define D_CONSOLE 0x02000000 +#define D_QUOTA 0x04000000 +#define D_SEC 0x08000000 +#define D_LFSCK 0x10000000 /* For both OI scrub and LFSCK */ +#define D_HSM 0x20000000 +#define D_SNAPSHOT 0x40000000 /* snapshot */ +#define D_LAYOUT 0x80000000 + +#define LIBCFS_DEBUG_MASKS_NAMES { \ + "trace", "inode", "super", "ext2", "malloc", "cache", "info", \ + "ioctl", "neterror", "net", "warning", "buffs", "other", \ + "dentry", "nettrace", "page", "dlmtrace", "error", "emerg", \ + "ha", "rpctrace", "vfstrace", "reada", "mmap", "config", \ + "console", "quota", "sec", "lfsck", "hsm", "snapshot", "layout",\ + NULL } + +#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) + +#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log" + +#endif /* __UAPI_LIBCFS_DEBUG_H__ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h new file mode 100644 index 000000000000..cce6b58e3682 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h @@ -0,0 +1,141 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/include/libcfs/libcfs_ioctl.h + * + * Low-level ioctl data structures. Kernel ioctl functions declared here, + * and user space functions are in libcfs/util/ioctl.h. + * + */ + +#ifndef __LIBCFS_IOCTL_H__ +#define __LIBCFS_IOCTL_H__ + +#include <linux/types.h> +#include <linux/ioctl.h> + +#define LIBCFS_IOCTL_VERSION 0x0001000a +#define LIBCFS_IOCTL_VERSION2 0x0001000b + +struct libcfs_ioctl_hdr { + __u32 ioc_len; + __u32 ioc_version; +}; + +/** max size to copy from userspace */ +#define LIBCFS_IOC_DATA_MAX (128 * 1024) + +struct libcfs_ioctl_data { + struct libcfs_ioctl_hdr ioc_hdr; + + __u64 ioc_nid; + __u64 ioc_u64[1]; + + __u32 ioc_flags; + __u32 ioc_count; + __u32 ioc_net; + __u32 ioc_u32[7]; + + __u32 ioc_inllen1; + char *ioc_inlbuf1; + __u32 ioc_inllen2; + char *ioc_inlbuf2; + + __u32 ioc_plen1; /* buffers in userspace */ + void __user *ioc_pbuf1; + __u32 ioc_plen2; /* buffers in userspace */ + void __user *ioc_pbuf2; + + char ioc_bulk[0]; +}; + +struct libcfs_debug_ioctl_data { + struct libcfs_ioctl_hdr hdr; + unsigned int subs; + unsigned int debug; +}; + +/* 'f' ioctls are defined in lustre_ioctl.h and lustre_user.h except for: */ +#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) +#define IOCTL_LIBCFS_TYPE long + +#define IOC_LIBCFS_TYPE ('e') +#define IOC_LIBCFS_MIN_NR 30 +/* libcfs ioctls */ +/* IOC_LIBCFS_PANIC obsolete in 2.8.0, was _IOWR('e', 30, IOCTL_LIBCFS_TYPE) */ +#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE) +/* IOC_LIBCFS_MEMHOG obsolete in 2.8.0, was _IOWR('e', 36, IOCTL_LIBCFS_TYPE) */ +/* lnet ioctls */ +#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) +/* IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) */ +#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE) +/* IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) */ +#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LNET_FAULT _IOWR('e', 64, IOCTL_LIBCFS_TYPE) +/* lnd ioctls */ +#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE) +/* ioctl 77 is free for use */ +#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE) + +/* + * DLC Specific IOCTL numbers. + * In order to maintain backward compatibility with any possible external + * tools which might be accessing the IOCTL numbers, a new group of IOCTL + * number have been allocated. + */ +#define IOCTL_CONFIG_SIZE struct lnet_ioctl_config_data +#define IOC_LIBCFS_ADD_ROUTE _IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_DEL_ROUTE _IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_ROUTE _IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_ADD_NET _IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_DEL_NET _IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_NET _IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_CONFIG_RTR _IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_ADD_BUF _IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_BUF _IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_PEER_INFO _IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_MAX_NR 91 + +#endif /* __LIBCFS_IOCTL_H__ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h new file mode 100644 index 000000000000..e45d828bfd1b --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h @@ -0,0 +1,149 @@ +/* + * LGPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. + * + * LGPL HEADER END + * + */ +/* + * Copyright (c) 2014, Intel Corporation. + */ +/* + * Author: Amir Shehata <amir.shehata@intel.com> + */ + +#ifndef LNET_DLC_H +#define LNET_DLC_H + +#include <uapi/linux/lnet/libcfs_ioctl.h> +#include <uapi/linux/lnet/lnet-types.h> + +#define MAX_NUM_SHOW_ENTRIES 32 +#define LNET_MAX_STR_LEN 128 +#define LNET_MAX_SHOW_NUM_CPT 128 +#define LNET_UNDEFINED_HOPS ((__u32)(-1)) + +struct lnet_ioctl_config_lnd_cmn_tunables { + __u32 lct_version; + __u32 lct_peer_timeout; + __u32 lct_peer_tx_credits; + __u32 lct_peer_rtr_credits; + __u32 lct_max_tx_credits; +}; + +struct lnet_ioctl_config_o2iblnd_tunables { + __u32 lnd_version; + __u32 lnd_peercredits_hiw; + __u32 lnd_map_on_demand; + __u32 lnd_concurrent_sends; + __u32 lnd_fmr_pool_size; + __u32 lnd_fmr_flush_trigger; + __u32 lnd_fmr_cache; + __u32 pad; +}; + +struct lnet_ioctl_config_lnd_tunables { + struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn; + union { + struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib; + } lt_tun_u; +}; + +struct lnet_ioctl_net_config { + char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN]; + __u32 ni_status; + __u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT]; + char cfg_bulk[0]; +}; + +#define LNET_TINY_BUF_IDX 0 +#define LNET_SMALL_BUF_IDX 1 +#define LNET_LARGE_BUF_IDX 2 + +/* # different router buffer pools */ +#define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1) + +struct lnet_ioctl_pool_cfg { + struct { + __u32 pl_npages; + __u32 pl_nbuffers; + __u32 pl_credits; + __u32 pl_mincredits; + } pl_pools[LNET_NRBPOOLS]; + __u32 pl_routing; +}; + +struct lnet_ioctl_config_data { + struct libcfs_ioctl_hdr cfg_hdr; + + __u32 cfg_net; + __u32 cfg_count; + __u64 cfg_nid; + __u32 cfg_ncpts; + + union { + struct { + __u32 rtr_hop; + __u32 rtr_priority; + __u32 rtr_flags; + } cfg_route; + struct { + char net_intf[LNET_MAX_STR_LEN]; + __s32 net_peer_timeout; + __s32 net_peer_tx_credits; + __s32 net_peer_rtr_credits; + __s32 net_max_tx_credits; + __u32 net_cksum_algo; + __u32 net_interface_count; + } cfg_net; + struct { + __u32 buf_enable; + __s32 buf_tiny; + __s32 buf_small; + __s32 buf_large; + } cfg_buffers; + } cfg_config_u; + + char cfg_bulk[0]; +}; + +struct lnet_ioctl_peer { + struct libcfs_ioctl_hdr pr_hdr; + __u32 pr_count; + __u32 pr_pad; + __u64 pr_nid; + + union { + struct { + char cr_aliveness[LNET_MAX_STR_LEN]; + __u32 cr_refcount; + __u32 cr_ni_peer_tx_credits; + __u32 cr_peer_tx_credits; + __u32 cr_peer_rtr_credits; + __u32 cr_peer_min_rtr_credits; + __u32 cr_peer_tx_qnob; + __u32 cr_ncpt; + } pr_peer_credits; + } pr_lnd_u; +}; + +struct lnet_ioctl_lnet_stats { + struct libcfs_ioctl_hdr st_hdr; + struct lnet_counters st_cntrs; +}; + +#endif /* LNET_DLC_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h new file mode 100644 index 000000000000..1be9b7aa7326 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h @@ -0,0 +1,669 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012 - 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Seagate, Inc. + */ + +#ifndef __LNET_TYPES_H__ +#define __LNET_TYPES_H__ + +#include <linux/types.h> +#include <linux/bvec.h> + +/** \addtogroup lnet + * @{ + */ + +#define LNET_VERSION "0.6.0" + +/** \addtogroup lnet_addr + * @{ + */ + +/** Portal reserved for LNet's own use. + * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments. + */ +#define LNET_RESERVED_PORTAL 0 + +/** + * Address of an end-point in an LNet network. + * + * A node can have multiple end-points and hence multiple addresses. + * An LNet network can be a simple network (e.g. tcp0) or a network of + * LNet networks connected by LNet routers. Therefore an end-point address + * has two parts: network ID, and address within a network. + * + * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID. + */ +typedef __u64 lnet_nid_t; +/** + * ID of a process in a node. Shortened as PID to distinguish from + * lnet_process_id, the global process ID. + */ +typedef __u32 lnet_pid_t; + +/** wildcard NID that matches any end-point address */ +#define LNET_NID_ANY ((lnet_nid_t)(-1)) +/** wildcard PID that matches any lnet_pid_t */ +#define LNET_PID_ANY ((lnet_pid_t)(-1)) + +#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */ +#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ +#define LNET_PID_LUSTRE 12345 + +#define LNET_TIME_FOREVER (-1) + +/* how an LNET NID encodes net:address */ +/** extract the address part of an lnet_nid_t */ + +static inline __u32 LNET_NIDADDR(lnet_nid_t nid) +{ + return nid & 0xffffffff; +} + +static inline __u32 LNET_NIDNET(lnet_nid_t nid) +{ + return (nid >> 32) & 0xffffffff; +} + +static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr) +{ + return (((__u64)net) << 32) | addr; +} + +static inline __u32 LNET_NETNUM(__u32 net) +{ + return net & 0xffff; +} + +static inline __u32 LNET_NETTYP(__u32 net) +{ + return (net >> 16) & 0xffff; +} + +static inline __u32 LNET_MKNET(__u32 type, __u32 num) +{ + return (type << 16) | num; +} + +#define WIRE_ATTR __packed + +/* Packed version of lnet_process_id to transfer via network */ +struct lnet_process_id_packed { + /* node id / process id */ + lnet_nid_t nid; + lnet_pid_t pid; +} WIRE_ATTR; + +/* + * The wire handle's interface cookie only matches one network interface in + * one epoch (i.e. new cookie when the interface restarts or the node + * reboots). The object cookie only matches one object on that interface + * during that object's lifetime (i.e. no cookie re-use). + */ +struct lnet_handle_wire { + __u64 wh_interface_cookie; + __u64 wh_object_cookie; +} WIRE_ATTR; + +enum lnet_msg_type { + LNET_MSG_ACK = 0, + LNET_MSG_PUT, + LNET_MSG_GET, + LNET_MSG_REPLY, + LNET_MSG_HELLO, +}; + +/* + * The variant fields of the portals message header are aligned on an 8 + * byte boundary in the message header. Note that all types used in these + * wire structs MUST be fixed size and the smaller types are placed at the + * end. + */ +struct lnet_ack { + struct lnet_handle_wire dst_wmd; + __u64 match_bits; + __u32 mlength; +} WIRE_ATTR; + +struct lnet_put { + struct lnet_handle_wire ack_wmd; + __u64 match_bits; + __u64 hdr_data; + __u32 ptl_index; + __u32 offset; +} WIRE_ATTR; + +struct lnet_get { + struct lnet_handle_wire return_wmd; + __u64 match_bits; + __u32 ptl_index; + __u32 src_offset; + __u32 sink_length; +} WIRE_ATTR; + +struct lnet_reply { + struct lnet_handle_wire dst_wmd; +} WIRE_ATTR; + +struct lnet_hello { + __u64 incarnation; + __u32 type; +} WIRE_ATTR; + +struct lnet_hdr { + lnet_nid_t dest_nid; + lnet_nid_t src_nid; + lnet_pid_t dest_pid; + lnet_pid_t src_pid; + __u32 type; /* enum lnet_msg_type */ + __u32 payload_length; /* payload data to follow */ + /*<------__u64 aligned------->*/ + union { + struct lnet_ack ack; + struct lnet_put put; + struct lnet_get get; + struct lnet_reply reply; + struct lnet_hello hello; + } msg; +} WIRE_ATTR; + +/* + * A HELLO message contains a magic number and protocol version + * code in the header's dest_nid, the peer's NID in the src_nid, and + * LNET_MSG_HELLO in the type field. All other common fields are zero + * (including payload_size; i.e. no payload). + * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is + * running the same protocol and to find out its NID. These LNDs should + * exchange HELLO messages when a connection is first established. Individual + * LNDs can put whatever else they fancy in struct lnet_hdr::msg. + */ +struct lnet_magicversion { + __u32 magic; /* LNET_PROTO_TCP_MAGIC */ + __u16 version_major; /* increment on incompatible change */ + __u16 version_minor; /* increment on compatible change */ +} WIRE_ATTR; + +/* PROTO MAGIC for LNDs */ +#define LNET_PROTO_IB_MAGIC 0x0be91b91 +#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */ +#define LNET_PROTO_TCP_MAGIC 0xeebc0ded +#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 +#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ + +/* Placeholder for a future "unified" protocol across all LNDs */ +/* + * Current LNDs that receive a request with this magic will respond with a + * "stub" reply using their current protocol + */ +#define LNET_PROTO_MAGIC 0x45726963 /* ! */ + +#define LNET_PROTO_TCP_VERSION_MAJOR 1 +#define LNET_PROTO_TCP_VERSION_MINOR 0 + +/* Acceptor connection request */ +struct lnet_acceptor_connreq { + __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ + __u32 acr_version; /* protocol version */ + __u64 acr_nid; /* target NID */ +} WIRE_ATTR; + +#define LNET_PROTO_ACCEPTOR_VERSION 1 + +struct lnet_ni_status { + lnet_nid_t ns_nid; + __u32 ns_status; + __u32 ns_unused; +} WIRE_ATTR; + +struct lnet_ping_info { + __u32 pi_magic; + __u32 pi_features; + lnet_pid_t pi_pid; + __u32 pi_nnis; + struct lnet_ni_status pi_ni[0]; +} WIRE_ATTR; + +struct lnet_counters { + __u32 msgs_alloc; + __u32 msgs_max; + __u32 errors; + __u32 send_count; + __u32 recv_count; + __u32 route_count; + __u32 drop_count; + __u64 send_length; + __u64 recv_length; + __u64 route_length; + __u64 drop_length; +} WIRE_ATTR; + +#define LNET_NI_STATUS_UP 0x15aac0de +#define LNET_NI_STATUS_DOWN 0xdeadface +#define LNET_NI_STATUS_INVALID 0x00000000 + +#define LNET_MAX_INTERFACES 16 + +/** + * Objects maintained by the LNet are accessed through handles. Handle types + * have names of the form lnet_handle_xx, where xx is one of the two letter + * object type codes ('eq' for event queue, 'md' for memory descriptor, and + * 'me' for match entry). Each type of object is given a unique handle type + * to enhance type checking. + */ +#define LNET_WIRE_HANDLE_COOKIE_NONE (-1) + +struct lnet_handle_eq { + u64 cookie; +}; + +/** + * Invalidate eq handle @h. + */ +static inline void LNetInvalidateEQHandle(struct lnet_handle_eq *h) +{ + h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; +} + +/** + * Check whether eq handle @h is invalid. + * + * @return 1 if handle is invalid, 0 if valid. + */ +static inline int LNetEQHandleIsInvalid(struct lnet_handle_eq h) +{ + return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie); +} + +struct lnet_handle_md { + u64 cookie; +}; + +/** + * Invalidate md handle @h. + */ +static inline void LNetInvalidateMDHandle(struct lnet_handle_md *h) +{ + h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; +} + +/** + * Check whether eq handle @h is invalid. + * + * @return 1 if handle is invalid, 0 if valid. + */ +static inline int LNetMDHandleIsInvalid(struct lnet_handle_md h) +{ + return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie); +} + +struct lnet_handle_me { + u64 cookie; +}; + +/** + * Global process ID. + */ +struct lnet_process_id { + /** node id */ + lnet_nid_t nid; + /** process id */ + lnet_pid_t pid; +}; +/** @} lnet_addr */ + +/** \addtogroup lnet_me + * @{ + */ + +/** + * Specifies whether the match entry or memory descriptor should be unlinked + * automatically (LNET_UNLINK) or not (LNET_RETAIN). + */ +enum lnet_unlink { + LNET_RETAIN = 0, + LNET_UNLINK +}; + +/** + * Values of the type lnet_ins_pos are used to control where a new match + * entry is inserted. The value LNET_INS_BEFORE is used to insert the new + * entry before the current entry or before the head of the list. The value + * LNET_INS_AFTER is used to insert the new entry after the current entry + * or after the last item in the list. + */ +enum lnet_ins_pos { + /** insert ME before current position or head of the list */ + LNET_INS_BEFORE, + /** insert ME after current position or tail of the list */ + LNET_INS_AFTER, + /** attach ME at tail of local CPU partition ME list */ + LNET_INS_LOCAL +}; + +/** @} lnet_me */ + +/** \addtogroup lnet_md + * @{ + */ + +/** + * Defines the visible parts of a memory descriptor. Values of this type + * are used to initialize memory descriptors. + */ +struct lnet_md { + /** + * Specify the memory region associated with the memory descriptor. + * If the options field has: + * - LNET_MD_KIOV bit set: The start field points to the starting + * address of an array of struct bio_vec and the length field specifies + * the number of entries in the array. The length can't be bigger + * than LNET_MAX_IOV. The struct bio_vec is used to describe page-based + * fragments that are not necessarily mapped in virtual memory. + * - LNET_MD_IOVEC bit set: The start field points to the starting + * address of an array of struct iovec and the length field specifies + * the number of entries in the array. The length can't be bigger + * than LNET_MAX_IOV. The struct iovec is used to describe fragments + * that have virtual addresses. + * - Otherwise: The memory region is contiguous. The start field + * specifies the starting address for the memory region and the + * length field specifies its length. + * + * When the memory region is fragmented, all fragments but the first + * one must start on page boundary, and all but the last must end on + * page boundary. + */ + void *start; + unsigned int length; + /** + * Specifies the maximum number of operations that can be performed + * on the memory descriptor. An operation is any action that could + * possibly generate an event. In the usual case, the threshold value + * is decremented for each operation on the MD. When the threshold + * drops to zero, the MD becomes inactive and does not respond to + * operations. A threshold value of LNET_MD_THRESH_INF indicates that + * there is no bound on the number of operations that may be applied + * to a MD. + */ + int threshold; + /** + * Specifies the largest incoming request that the memory descriptor + * should respond to. When the unused portion of a MD (length - + * local offset) falls below this value, the MD becomes inactive and + * does not respond to further operations. This value is only used + * if the LNET_MD_MAX_SIZE option is set. + */ + int max_size; + /** + * Specifies the behavior of the memory descriptor. A bitwise OR + * of the following values can be used: + * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD. + * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD. + * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory + * region is provided by the incoming request. By default, the + * offset is maintained locally. When maintained locally, the + * offset is incremented by the length of the request so that + * the next operation (PUT or GET) will access the next part of + * the memory region. Note that only one offset variable exists + * per memory descriptor. If both PUT and GET operations are + * performed on a memory descriptor, the offset is updated each time. + * - LNET_MD_TRUNCATE: The length provided in the incoming request can + * be reduced to match the memory available in the region (determined + * by subtracting the offset from the length of the memory region). + * By default, if the length in the incoming operation is greater + * than the amount of memory available, the operation is rejected. + * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for + * incoming PUT operations, even if requested. By default, + * acknowledgments are sent for PUT operations that request an + * acknowledgment. Acknowledgments are never sent for GET operations. + * The data sent in the REPLY serves as an implicit acknowledgment. + * - LNET_MD_KIOV: The start and length fields specify an array of + * struct bio_vec. + * - LNET_MD_IOVEC: The start and length fields specify an array of + * struct iovec. + * - LNET_MD_MAX_SIZE: The max_size field is valid. + * + * Note: + * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather + * capability for memory descriptors. They can't be both set. + * - When LNET_MD_MAX_SIZE is set, the total length of the memory + * region (i.e. sum of all fragment lengths) must not be less than + * \a max_size. + */ + unsigned int options; + /** + * A user-specified value that is associated with the memory + * descriptor. The value does not need to be a pointer, but must fit + * in the space used by a pointer. This value is recorded in events + * associated with operations on this MD. + */ + void *user_ptr; + /** + * A handle for the event queue used to log the operations performed on + * the memory region. If this argument is a NULL handle (i.e. nullified + * by LNetInvalidateHandle()), operations performed on this memory + * descriptor are not logged. + */ + struct lnet_handle_eq eq_handle; +}; + +/* + * Max Transfer Unit (minimum supported everywhere). + * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) + * these limits are system wide and not interface-local. + */ +#define LNET_MTU_BITS 20 +#define LNET_MTU (1 << LNET_MTU_BITS) + +/** limit on the number of fragments in discontiguous MDs */ +#define LNET_MAX_IOV 256 + +/** + * Options for the MD structure. See lnet_md::options. + */ +#define LNET_MD_OP_PUT (1 << 0) +/** See lnet_md::options. */ +#define LNET_MD_OP_GET (1 << 1) +/** See lnet_md::options. */ +#define LNET_MD_MANAGE_REMOTE (1 << 2) +/* unused (1 << 3) */ +/** See lnet_md::options. */ +#define LNET_MD_TRUNCATE (1 << 4) +/** See lnet_md::options. */ +#define LNET_MD_ACK_DISABLE (1 << 5) +/** See lnet_md::options. */ +#define LNET_MD_IOVEC (1 << 6) +/** See lnet_md::options. */ +#define LNET_MD_MAX_SIZE (1 << 7) +/** See lnet_md::options. */ +#define LNET_MD_KIOV (1 << 8) + +/* For compatibility with Cray Portals */ +#define LNET_MD_PHYS 0 + +/** Infinite threshold on MD operations. See lnet_md::threshold */ +#define LNET_MD_THRESH_INF (-1) + +/** @} lnet_md */ + +/** \addtogroup lnet_eq + * @{ + */ + +/** + * Six types of events can be logged in an event queue. + */ +enum lnet_event_kind { + /** An incoming GET operation has completed on the MD. */ + LNET_EVENT_GET = 1, + /** + * An incoming PUT operation has completed on the MD. The + * underlying layers will not alter the memory (on behalf of this + * operation) once this event has been logged. + */ + LNET_EVENT_PUT, + /** + * A REPLY operation has completed. This event is logged after the + * data (if any) from the REPLY has been written into the MD. + */ + LNET_EVENT_REPLY, + /** An acknowledgment has been received. */ + LNET_EVENT_ACK, + /** + * An outgoing send (PUT or GET) operation has completed. This event + * is logged after the entire buffer has been sent and it is safe for + * the caller to reuse the buffer. + * + * Note: + * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can + * happen even when the message has not yet been put out on wire. + * - It's unsafe to assume that in an outgoing GET operation + * the LNET_EVENT_SEND event would happen before the + * LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and + * LNET_EVENT_ACK events in an outgoing PUT operation. + */ + LNET_EVENT_SEND, + /** + * A MD has been unlinked. Note that LNetMDUnlink() does not + * necessarily trigger an LNET_EVENT_UNLINK event. + * \see LNetMDUnlink + */ + LNET_EVENT_UNLINK, +}; + +#define LNET_SEQ_GT(a, b) (((signed long)((a) - (b))) > 0) + +/** + * Information about an event on a MD. + */ +struct lnet_event { + /** The identifier (nid, pid) of the target. */ + struct lnet_process_id target; + /** The identifier (nid, pid) of the initiator. */ + struct lnet_process_id initiator; + /** + * The NID of the immediate sender. If the request has been forwarded + * by routers, this is the NID of the last hop; otherwise it's the + * same as the initiator. + */ + lnet_nid_t sender; + /** Indicates the type of the event. */ + enum lnet_event_kind type; + /** The portal table index specified in the request */ + unsigned int pt_index; + /** A copy of the match bits specified in the request. */ + __u64 match_bits; + /** The length (in bytes) specified in the request. */ + unsigned int rlength; + /** + * The length (in bytes) of the data that was manipulated by the + * operation. For truncated operations, the manipulated length will be + * the number of bytes specified by the MD (possibly with an offset, + * see lnet_md). For all other operations, the manipulated length + * will be the length of the requested operation, i.e. rlength. + */ + unsigned int mlength; + /** + * The handle to the MD associated with the event. The handle may be + * invalid if the MD has been unlinked. + */ + struct lnet_handle_md md_handle; + /** + * A snapshot of the state of the MD immediately after the event has + * been processed. In particular, the threshold field in md will + * reflect the value of the threshold after the operation occurred. + */ + struct lnet_md md; + /** + * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT. + * \see LNetPut + */ + __u64 hdr_data; + /** + * Indicates the completion status of the operation. It's 0 for + * successful operations, otherwise it's an error code. + */ + int status; + /** + * Indicates whether the MD has been unlinked. Note that: + * - An event with unlinked set is the last event on the MD. + * - This field is also set for an explicit LNET_EVENT_UNLINK event. + * \see LNetMDUnlink + */ + int unlinked; + /** + * The displacement (in bytes) into the memory region that the + * operation used. The offset can be determined by the operation for + * a remote managed MD or by the local MD. + * \see lnet_md::options + */ + unsigned int offset; + /** + * The sequence number for this event. Sequence numbers are unique + * to each event. + */ + volatile unsigned long sequence; +}; + +/** + * Event queue handler function type. + * + * The EQ handler runs for each event that is deposited into the EQ. The + * handler is supplied with a pointer to the event that triggered the + * handler invocation. + * + * The handler must not block, must be reentrant, and must not call any LNet + * API functions. It should return as quickly as possible. + */ +typedef void (*lnet_eq_handler_t)(struct lnet_event *event); +#define LNET_EQ_HANDLER_NONE NULL +/** @} lnet_eq */ + +/** \addtogroup lnet_data + * @{ + */ + +/** + * Specify whether an acknowledgment should be sent by target when the PUT + * operation completes (i.e., when the data has been written to a MD of the + * target process). + * + * \see lnet_md::options for the discussion on LNET_MD_ACK_DISABLE by which + * acknowledgments can be disabled for a MD. + */ +enum lnet_ack_req { + /** Request an acknowledgment */ + LNET_ACK_REQ, + /** Request that no acknowledgment should be generated. */ + LNET_NOACK_REQ +}; +/** @} lnet_data */ + +/** @} lnet */ +#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h new file mode 100644 index 000000000000..d9da625d70de --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h @@ -0,0 +1,134 @@ +/* + * This file is part of Portals, http://www.sf.net/projects/lustre/ + * + * Portals is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Portals is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * header for lnet ioctl + */ +#ifndef _LNETCTL_H_ +#define _LNETCTL_H_ + +#include <uapi/linux/lnet/lnet-types.h> + +/** \addtogroup lnet_fault_simulation + * @{ + */ + +enum { + LNET_CTL_DROP_ADD, + LNET_CTL_DROP_DEL, + LNET_CTL_DROP_RESET, + LNET_CTL_DROP_LIST, + LNET_CTL_DELAY_ADD, + LNET_CTL_DELAY_DEL, + LNET_CTL_DELAY_RESET, + LNET_CTL_DELAY_LIST, +}; + +#define LNET_ACK_BIT (1 << 0) +#define LNET_PUT_BIT (1 << 1) +#define LNET_GET_BIT (1 << 2) +#define LNET_REPLY_BIT (1 << 3) + +/** ioctl parameter for LNet fault simulation */ +struct lnet_fault_attr { + /** + * source NID of drop rule + * LNET_NID_ANY is wildcard for all sources + * 255.255.255.255@net is wildcard for all addresses from @net + */ + lnet_nid_t fa_src; + /** destination NID of drop rule, see \a dr_src for details */ + lnet_nid_t fa_dst; + /** + * Portal mask to drop, -1 means all portals, for example: + * fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) | + * (1 << LDLM_CANCEL_REQUEST_PORTAL) + * + * If it is non-zero then only PUT and GET will be filtered, otherwise + * there is no portal filter, all matched messages will be checked. + */ + __u64 fa_ptl_mask; + /** + * message types to drop, for example: + * dra_type = LNET_DROP_ACK_BIT | LNET_DROP_PUT_BIT + * + * If it is non-zero then only specified message types are filtered, + * otherwise all message types will be checked. + */ + __u32 fa_msg_mask; + union { + /** message drop simulation */ + struct { + /** drop rate of this rule */ + __u32 da_rate; + /** + * time interval of message drop, it is exclusive + * with da_rate + */ + __u32 da_interval; + } drop; + /** message latency simulation */ + struct { + __u32 la_rate; + /** + * time interval of message delay, it is exclusive + * with la_rate + */ + __u32 la_interval; + /** latency to delay */ + __u32 la_latency; + } delay; + __u64 space[8]; + } u; +}; + +/** fault simluation stats */ +struct lnet_fault_stat { + /** total # matched messages */ + __u64 fs_count; + /** # dropped LNET_MSG_PUT by this rule */ + __u64 fs_put; + /** # dropped LNET_MSG_ACK by this rule */ + __u64 fs_ack; + /** # dropped LNET_MSG_GET by this rule */ + __u64 fs_get; + /** # dropped LNET_MSG_REPLY by this rule */ + __u64 fs_reply; + union { + struct { + /** total # dropped messages */ + __u64 ds_dropped; + } drop; + struct { + /** total # delayed messages */ + __u64 ls_delayed; + } delay; + __u64 space[8]; + } u; +}; + +/** @} lnet_fault_simulation */ + +#define LNET_DEV_ID 0 +#define LNET_DEV_PATH "/dev/lnet" +#define LNET_DEV_MAJOR 10 +#define LNET_DEV_MINOR 240 +#define OBD_DEV_ID 1 +#define OBD_DEV_NAME "obd" +#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME +#define OBD_DEV_MAJOR 10 +#define OBD_DEV_MINOR 241 +#define SMFS_DEV_ID 2 +#define SMFS_DEV_PATH "/dev/snapdev" +#define SMFS_DEV_MAJOR 10 +#define SMFS_DEV_MINOR 242 + +#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h new file mode 100644 index 000000000000..a4f9ff01d458 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h @@ -0,0 +1,556 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011 - 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Seagate, Inc. + * + * lnet/include/lnet/lnetst.h + * + * Author: Liang Zhen <liang.zhen@intel.com> + */ + +#ifndef __LNET_ST_H__ +#define __LNET_ST_H__ + +#include <linux/types.h> + +#define LST_FEAT_NONE (0) +#define LST_FEAT_BULK_LEN (1 << 0) /* enable variable page size */ + +#define LST_FEATS_EMPTY (LST_FEAT_NONE) +#define LST_FEATS_MASK (LST_FEAT_NONE | LST_FEAT_BULK_LEN) + +#define LST_NAME_SIZE 32 /* max name buffer length */ + +#define LSTIO_DEBUG 0xC00 /* debug */ +#define LSTIO_SESSION_NEW 0xC01 /* create session */ +#define LSTIO_SESSION_END 0xC02 /* end session */ +#define LSTIO_SESSION_INFO 0xC03 /* query session */ +#define LSTIO_GROUP_ADD 0xC10 /* add group */ +#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */ +#define LSTIO_GROUP_INFO 0xC12 /* query default information of + * specified group + */ +#define LSTIO_GROUP_DEL 0xC13 /* delete group */ +#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */ +#define LSTIO_GROUP_UPDATE 0xC15 /* update group */ +#define LSTIO_BATCH_ADD 0xC20 /* add batch */ +#define LSTIO_BATCH_START 0xC21 /* start batch */ +#define LSTIO_BATCH_STOP 0xC22 /* stop batch */ +#define LSTIO_BATCH_DEL 0xC23 /* delete batch */ +#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */ +#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */ +#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */ +#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */ +#define LSTIO_STAT_QUERY 0xC30 /* get stats */ + +struct lst_sid { + lnet_nid_t ses_nid; /* nid of console node */ + __u64 ses_stamp; /* time stamp */ +}; /*** session id */ + +extern struct lst_sid LST_INVALID_SID; + +struct lst_bid { + __u64 bat_id; /* unique id in session */ +}; /*** batch id (group of tests) */ + +/* Status of test node */ +#define LST_NODE_ACTIVE 0x1 /* node in this session */ +#define LST_NODE_BUSY 0x2 /* node is taken by other session */ +#define LST_NODE_DOWN 0x4 /* node is down */ +#define LST_NODE_UNKNOWN 0x8 /* node not in session */ + +struct lstcon_node_ent { + struct lnet_process_id nde_id; /* id of node */ + int nde_state; /* state of node */ +}; /*** node entry, for list_group command */ + +struct lstcon_ndlist_ent { + int nle_nnode; /* # of nodes */ + int nle_nactive; /* # of active nodes */ + int nle_nbusy; /* # of busy nodes */ + int nle_ndown; /* # of down nodes */ + int nle_nunknown; /* # of unknown nodes */ +}; /*** node_list entry, for list_batch command */ + +struct lstcon_test_ent { + int tse_type; /* test type */ + int tse_loop; /* loop count */ + int tse_concur; /* concurrency of test */ +}; /* test summary entry, for + * list_batch command + */ + +struct lstcon_batch_ent { + int bae_state; /* batch status */ + int bae_timeout; /* batch timeout */ + int bae_ntest; /* # of tests in the batch */ +}; /* batch summary entry, for + * list_batch command + */ + +struct lstcon_test_batch_ent { + struct lstcon_ndlist_ent tbe_cli_nle; /* client (group) node_list + * entry + */ + struct lstcon_ndlist_ent tbe_srv_nle; /* server (group) node_list + * entry + */ + union { + struct lstcon_test_ent tbe_test; /* test entry */ + struct lstcon_batch_ent tbe_batch;/* batch entry */ + } u; +}; /* test/batch verbose information entry, + * for list_batch command + */ + +struct lstcon_rpc_ent { + struct list_head rpe_link; /* link chain */ + struct lnet_process_id rpe_peer; /* peer's id */ + struct timeval rpe_stamp; /* time stamp of RPC */ + int rpe_state; /* peer's state */ + int rpe_rpc_errno; /* RPC errno */ + + struct lst_sid rpe_sid; /* peer's session id */ + int rpe_fwk_errno; /* framework errno */ + int rpe_priv[4]; /* private data */ + char rpe_payload[0]; /* private reply payload */ +}; + +struct lstcon_trans_stat { + int trs_rpc_stat[4]; /* RPCs stat (0: total 1: failed + * 2: finished + * 4: reserved + */ + int trs_rpc_errno; /* RPC errno */ + int trs_fwk_stat[8]; /* framework stat */ + int trs_fwk_errno; /* errno of the first remote error */ + void *trs_fwk_private; /* private framework stat */ +}; + +static inline int +lstcon_rpc_stat_total(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0]; +} + +static inline int +lstcon_rpc_stat_success(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1]; +} + +static inline int +lstcon_rpc_stat_failure(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2]; +} + +static inline int +lstcon_sesop_stat_success(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_sesop_stat_failure(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_sesqry_stat_active(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_sesqry_stat_busy(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_sesqry_stat_unknown(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; +} + +static inline int +lstcon_tsbop_stat_success(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_tsbop_stat_failure(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_tsbqry_stat_idle(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_tsbqry_stat_run(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_tsbqry_stat_failure(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; +} + +static inline int +lstcon_statqry_stat_success(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_statqry_stat_failure(struct lstcon_trans_stat *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +/* create a session */ +struct lstio_session_new_args { + int lstio_ses_key; /* IN: local key */ + int lstio_ses_timeout; /* IN: session timeout */ + int lstio_ses_force; /* IN: force create ? */ + /** IN: session features */ + unsigned int lstio_ses_feats; + struct lst_sid __user *lstio_ses_idp; /* OUT: session id */ + int lstio_ses_nmlen; /* IN: name length */ + char __user *lstio_ses_namep; /* IN: session name */ +}; + +/* query current session */ +struct lstio_session_info_args { + struct lst_sid __user *lstio_ses_idp; /* OUT: session id */ + int __user *lstio_ses_keyp; /* OUT: local key */ + /** OUT: session features */ + unsigned int __user *lstio_ses_featp; + struct lstcon_ndlist_ent __user *lstio_ses_ndinfo;/* OUT: */ + int lstio_ses_nmlen; /* IN: name length */ + char __user *lstio_ses_namep; /* OUT: session name */ +}; + +/* delete a session */ +struct lstio_session_end_args { + int lstio_ses_key; /* IN: session key */ +}; + +#define LST_OPC_SESSION 1 +#define LST_OPC_GROUP 2 +#define LST_OPC_NODES 3 +#define LST_OPC_BATCHCLI 4 +#define LST_OPC_BATCHSRV 5 + +struct lstio_debug_args { + int lstio_dbg_key; /* IN: session key */ + int lstio_dbg_type; /* IN: debug + * session|batch| + * group|nodes list + */ + int lstio_dbg_flags; /* IN: reserved debug + * flags + */ + int lstio_dbg_timeout; /* IN: timeout of + * debug + */ + int lstio_dbg_nmlen; /* IN: len of name */ + char __user *lstio_dbg_namep; /* IN: name of + * group|batch + */ + int lstio_dbg_count; /* IN: # of test nodes + * to debug + */ + struct lnet_process_id __user *lstio_dbg_idsp; /* IN: id of test + * nodes + */ + struct list_head __user *lstio_dbg_resultp; /* OUT: list head of + * result buffer + */ +}; + +struct lstio_group_add_args { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char __user *lstio_grp_namep; /* IN: group name */ +}; + +struct lstio_group_del_args { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char __user *lstio_grp_namep; /* IN: group name */ +}; + +#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */ +#define LST_GROUP_REFRESH 2 /* refresh inactive nodes + * in the group + */ +#define LST_GROUP_RMND 3 /* delete nodes from the group */ + +struct lstio_group_update_args { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_opc; /* IN: OPC */ + int lstio_grp_args; /* IN: arguments */ + int lstio_grp_nmlen; /* IN: name length */ + char __user *lstio_grp_namep; /* IN: group name */ + int lstio_grp_count; /* IN: # of nodes id */ + struct lnet_process_id __user *lstio_grp_idsp; /* IN: array of nodes */ + struct list_head __user *lstio_grp_resultp; /* OUT: list head of + * result buffer + */ +}; + +struct lstio_group_nodes_args { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char __user *lstio_grp_namep; /* IN: group name */ + int lstio_grp_count; /* IN: # of nodes */ + /** OUT: session features */ + unsigned int __user *lstio_grp_featp; + struct lnet_process_id __user *lstio_grp_idsp; /* IN: nodes */ + struct list_head __user *lstio_grp_resultp; /* OUT: list head of + * result buffer + */ +}; + +struct lstio_group_list_args { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_idx; /* IN: group idx */ + int lstio_grp_nmlen; /* IN: name len */ + char __user *lstio_grp_namep; /* OUT: name */ +}; + +struct lstio_group_info_args { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name len */ + char __user *lstio_grp_namep; /* IN: name */ + struct lstcon_ndlist_ent __user *lstio_grp_entp;/* OUT: description + * of group + */ + int __user *lstio_grp_idxp; /* IN/OUT: node index */ + int __user *lstio_grp_ndentp; /* IN/OUT: # of nodent */ + struct lstcon_node_ent __user *lstio_grp_dentsp;/* OUT: nodent array */ +}; + +#define LST_DEFAULT_BATCH "batch" /* default batch name */ + +struct lstio_batch_add_args { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char __user *lstio_bat_namep; /* IN: batch name */ +}; + +struct lstio_batch_del_args { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char __user *lstio_bat_namep; /* IN: batch name */ +}; + +struct lstio_batch_run_args { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_timeout; /* IN: timeout for + * the batch + */ + int lstio_bat_nmlen; /* IN: name length */ + char __user *lstio_bat_namep; /* IN: batch name */ + struct list_head __user *lstio_bat_resultp; /* OUT: list head of + * result buffer + */ +}; + +struct lstio_batch_stop_args { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_force; /* IN: abort unfinished + * test RPC + */ + int lstio_bat_nmlen; /* IN: name length */ + char __user *lstio_bat_namep; /* IN: batch name */ + struct list_head __user *lstio_bat_resultp; /* OUT: list head of + * result buffer + */ +}; + +struct lstio_batch_query_args { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_testidx; /* IN: test index */ + int lstio_bat_client; /* IN: we testing + * client? + */ + int lstio_bat_timeout; /* IN: timeout for + * waiting + */ + int lstio_bat_nmlen; /* IN: name length */ + char __user *lstio_bat_namep; /* IN: batch name */ + struct list_head __user *lstio_bat_resultp; /* OUT: list head of + * result buffer + */ +}; + +struct lstio_batch_list_args { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_idx; /* IN: index */ + int lstio_bat_nmlen; /* IN: name length */ + char __user *lstio_bat_namep; /* IN: batch name */ +}; + +struct lstio_batch_info_args { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char __user *lstio_bat_namep; /* IN: name */ + int lstio_bat_server; /* IN: query server + * or not + */ + int lstio_bat_testidx; /* IN: test index */ + struct lstcon_test_batch_ent __user *lstio_bat_entp;/* OUT: batch ent */ + + int __user *lstio_bat_idxp; /* IN/OUT: index of node */ + int __user *lstio_bat_ndentp; /* IN/OUT: # of nodent */ + struct lstcon_node_ent __user *lstio_bat_dentsp;/* array of nodent */ +}; + +/* add stat in session */ +struct lstio_stat_args { + int lstio_sta_key; /* IN: session key */ + int lstio_sta_timeout; /* IN: timeout for + * stat request + */ + int lstio_sta_nmlen; /* IN: group name + * length + */ + char __user *lstio_sta_namep; /* IN: group name */ + int lstio_sta_count; /* IN: # of pid */ + struct lnet_process_id __user *lstio_sta_idsp; /* IN: pid */ + struct list_head __user *lstio_sta_resultp; /* OUT: list head of + * result buffer + */ +}; + +enum lst_test_type { + LST_TEST_BULK = 1, + LST_TEST_PING = 2 +}; + +/* create a test in a batch */ +#define LST_MAX_CONCUR 1024 /* Max concurrency of test */ + +struct lstio_test_args { + int lstio_tes_key; /* IN: session key */ + int lstio_tes_bat_nmlen; /* IN: batch name len */ + char __user *lstio_tes_bat_name; /* IN: batch name */ + int lstio_tes_type; /* IN: test type */ + int lstio_tes_oneside; /* IN: one sided test */ + int lstio_tes_loop; /* IN: loop count */ + int lstio_tes_concur; /* IN: concurrency */ + + int lstio_tes_dist; /* IN: node distribution in + * destination groups + */ + int lstio_tes_span; /* IN: node span in + * destination groups + */ + int lstio_tes_sgrp_nmlen; /* IN: source group + * name length + */ + char __user *lstio_tes_sgrp_name; /* IN: group name */ + int lstio_tes_dgrp_nmlen; /* IN: destination group + * name length + */ + char __user *lstio_tes_dgrp_name; /* IN: group name */ + + int lstio_tes_param_len; /* IN: param buffer len */ + void __user *lstio_tes_param; /* IN: parameter for specified + * test: lstio_bulk_param_t, + * lstio_ping_param_t, + * ... more + */ + int __user *lstio_tes_retp; /* OUT: private returned + * value + */ + struct list_head __user *lstio_tes_resultp;/* OUT: list head of + * result buffer + */ +}; + +enum lst_brw_type { + LST_BRW_READ = 1, + LST_BRW_WRITE = 2 +}; + +enum lst_brw_flags { + LST_BRW_CHECK_NONE = 1, + LST_BRW_CHECK_SIMPLE = 2, + LST_BRW_CHECK_FULL = 3 +}; + +struct lst_test_bulk_param { + int blk_opc; /* bulk operation code */ + int blk_size; /* size (bytes) */ + int blk_time; /* time of running the test*/ + int blk_flags; /* reserved flags */ + int blk_cli_off; /* bulk offset on client */ + int blk_srv_off; /* reserved: bulk offset on server */ +}; + +struct lst_test_ping_param { + int png_size; /* size of ping message */ + int png_time; /* time */ + int png_loop; /* loop */ + int png_flags; /* reserved flags */ +}; + +struct srpc_counters { + __u32 errors; + __u32 rpcs_sent; + __u32 rpcs_rcvd; + __u32 rpcs_dropped; + __u32 rpcs_expired; + __u64 bulk_get; + __u64 bulk_put; +} WIRE_ATTR; + +struct sfw_counters { + /** milliseconds since current session started */ + __u32 running_ms; + __u32 active_batches; + __u32 zombie_sessions; + __u32 brw_errors; + __u32 ping_errors; +} WIRE_ATTR; + +#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h b/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h new file mode 100644 index 000000000000..882074ed6021 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h @@ -0,0 +1,119 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +#ifndef _LNET_NIDSTRINGS_H +#define _LNET_NIDSTRINGS_H + +#include <uapi/linux/lnet/lnet-types.h> + +/** + * Lustre Network Driver types. + */ +enum { + /* + * Only add to these values (i.e. don't ever change or redefine them): + * network addresses depend on them... + */ + QSWLND = 1, + SOCKLND = 2, + GMLND = 3, + PTLLND = 4, + O2IBLND = 5, + CIBLND = 6, + OPENIBLND = 7, + IIBLND = 8, + LOLND = 9, + RALND = 10, + VIBLND = 11, + MXLND = 12, + GNILND = 13, + GNIIPLND = 14, +}; + +struct list_head; + +#define LNET_NIDSTR_COUNT 1024 /* # of nidstrings */ +#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ + +/* support decl needed by both kernel and user space */ +char *libcfs_next_nidstring(void); +int libcfs_isknown_lnd(__u32 lnd); +char *libcfs_lnd2modname(__u32 lnd); +char *libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size); +static inline char *libcfs_lnd2str(__u32 lnd) +{ + return libcfs_lnd2str_r(lnd, libcfs_next_nidstring(), + LNET_NIDSTR_SIZE); +} + +int libcfs_str2lnd(const char *str); +char *libcfs_net2str_r(__u32 net, char *buf, size_t buf_size); +static inline char *libcfs_net2str(__u32 net) +{ + return libcfs_net2str_r(net, libcfs_next_nidstring(), + LNET_NIDSTR_SIZE); +} + +char *libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size); +static inline char *libcfs_nid2str(lnet_nid_t nid) +{ + return libcfs_nid2str_r(nid, libcfs_next_nidstring(), + LNET_NIDSTR_SIZE); +} + +__u32 libcfs_str2net(const char *str); +lnet_nid_t libcfs_str2nid(const char *str); +int libcfs_str2anynid(lnet_nid_t *nid, const char *str); +char *libcfs_id2str(struct lnet_process_id id); +void cfs_free_nidlist(struct list_head *list); +int cfs_parse_nidlist(char *str, int len, struct list_head *list); +int cfs_print_nidlist(char *buffer, int count, struct list_head *list); +int cfs_match_nid(lnet_nid_t nid, struct list_head *list); + +int cfs_ip_addr_parse(char *str, int len, struct list_head *list); +int cfs_ip_addr_match(__u32 addr, struct list_head *list); +bool cfs_nidrange_is_contiguous(struct list_head *nidlist); +void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid, + char *max_nid, size_t nidstr_length); + +struct netstrfns { + __u32 nf_type; + char *nf_name; + char *nf_modname; + void (*nf_addr2str)(__u32 addr, char *str, size_t size); + int (*nf_str2addr)(const char *str, int nob, __u32 *addr); + int (*nf_parse_addrlist)(char *str, int len, + struct list_head *list); + int (*nf_print_addrlist)(char *buffer, int count, + struct list_head *list); + int (*nf_match_addr)(__u32 addr, struct list_head *list); + bool (*nf_is_contiguous)(struct list_head *nidlist); + void (*nf_min_max)(struct list_head *nidlist, __u32 *min_nid, + __u32 *max_nid); +}; + +#endif /* _LNET_NIDSTRINGS_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h b/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h new file mode 100644 index 000000000000..6453e053fa99 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h @@ -0,0 +1,44 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * #defines shared between socknal implementation and utilities + */ +#ifndef __UAPI_LNET_SOCKLND_H__ +#define __UAPI_LNET_SOCKLND_H__ + +#define SOCKLND_CONN_NONE (-1) +#define SOCKLND_CONN_ANY 0 +#define SOCKLND_CONN_CONTROL 1 +#define SOCKLND_CONN_BULK_IN 2 +#define SOCKLND_CONN_BULK_OUT 3 +#define SOCKLND_CONN_NTYPES 4 + +#define SOCKLND_CONN_ACK SOCKLND_CONN_BULK_IN + +#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h new file mode 100644 index 000000000000..11b51d93f64c --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h @@ -0,0 +1,261 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef _UAPI_LUSTRE_CFG_H_ +#define _UAPI_LUSTRE_CFG_H_ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <uapi/linux/lustre/lustre_user.h> + +/** \defgroup cfg cfg + * + * @{ + */ + +/* + * 1cf6 + * lcfG + */ +#define LUSTRE_CFG_VERSION 0x1cf60001 +#define LUSTRE_CFG_MAX_BUFCOUNT 8 + +#define LCFG_HDR_SIZE(count) \ + __ALIGN_KERNEL(offsetof(struct lustre_cfg, lcfg_buflens[(count)]), 8) + +/** If the LCFG_REQUIRED bit is set in a configuration command, + * then the client is required to understand this parameter + * in order to mount the filesystem. If it does not understand + * a REQUIRED command the client mount will fail. + */ +#define LCFG_REQUIRED 0x0001000 + +enum lcfg_command_type { + LCFG_ATTACH = 0x00cf001, /**< create a new obd instance */ + LCFG_DETACH = 0x00cf002, /**< destroy obd instance */ + LCFG_SETUP = 0x00cf003, /**< call type-specific setup */ + LCFG_CLEANUP = 0x00cf004, /**< call type-specific cleanup + */ + LCFG_ADD_UUID = 0x00cf005, /**< add a nid to a niduuid */ + LCFG_DEL_UUID = 0x00cf006, /**< remove a nid from + * a niduuid + */ + LCFG_MOUNTOPT = 0x00cf007, /**< create a profile + * (mdc, osc) + */ + LCFG_DEL_MOUNTOPT = 0x00cf008, /**< destroy a profile */ + LCFG_SET_TIMEOUT = 0x00cf009, /**< set obd_timeout */ + LCFG_SET_UPCALL = 0x00cf00a, /**< deprecated */ + LCFG_ADD_CONN = 0x00cf00b, /**< add a failover niduuid to + * an obd + */ + LCFG_DEL_CONN = 0x00cf00c, /**< remove a failover niduuid */ + LCFG_LOV_ADD_OBD = 0x00cf00d, /**< add an osc to a lov */ + LCFG_LOV_DEL_OBD = 0x00cf00e, /**< remove an osc from a lov */ + LCFG_PARAM = 0x00cf00f, /**< set a proc parameter */ + LCFG_MARKER = 0x00cf010, /**< metadata about next + * cfg rec + */ + LCFG_LOG_START = 0x00ce011, /**< mgc only, process a + * cfg log + */ + LCFG_LOG_END = 0x00ce012, /**< stop processing updates */ + LCFG_LOV_ADD_INA = 0x00ce013, /**< like LOV_ADD_OBD, + * inactive + */ + LCFG_ADD_MDC = 0x00cf014, /**< add an mdc to a lmv */ + LCFG_DEL_MDC = 0x00cf015, /**< remove an mdc from a lmv */ + LCFG_SPTLRPC_CONF = 0x00ce016, /**< security */ + LCFG_POOL_NEW = 0x00ce020, /**< create an ost pool name */ + LCFG_POOL_ADD = 0x00ce021, /**< add an ost to a pool */ + LCFG_POOL_REM = 0x00ce022, /**< remove an ost from a pool */ + LCFG_POOL_DEL = 0x00ce023, /**< destroy an ost pool name */ + LCFG_SET_LDLM_TIMEOUT = 0x00ce030, /**< set ldlm_timeout */ + LCFG_PRE_CLEANUP = 0x00cf031, /**< call type-specific pre + * cleanup cleanup + */ + LCFG_SET_PARAM = 0x00ce032, /**< use set_param syntax to set + * a proc parameters + */ +}; + +struct lustre_cfg_bufs { + void *lcfg_buf[LUSTRE_CFG_MAX_BUFCOUNT]; + __u32 lcfg_buflen[LUSTRE_CFG_MAX_BUFCOUNT]; + __u32 lcfg_bufcount; +}; + +struct lustre_cfg { + __u32 lcfg_version; + __u32 lcfg_command; + + __u32 lcfg_num; + __u32 lcfg_flags; + __u64 lcfg_nid; + __u32 lcfg_nal; /* not used any more */ + + __u32 lcfg_bufcount; + __u32 lcfg_buflens[0]; +}; + +enum cfg_record_type { + PORTALS_CFG_TYPE = 1, + LUSTRE_CFG_TYPE = 123, +}; + +#define LUSTRE_CFG_BUFLEN(lcfg, idx) \ + ((lcfg)->lcfg_bufcount <= (idx) ? 0 : (lcfg)->lcfg_buflens[(idx)]) + +static inline void lustre_cfg_bufs_set(struct lustre_cfg_bufs *bufs, + __u32 index, void *buf, __u32 buflen) +{ + if (index >= LUSTRE_CFG_MAX_BUFCOUNT) + return; + + if (!bufs) + return; + + if (bufs->lcfg_bufcount <= index) + bufs->lcfg_bufcount = index + 1; + + bufs->lcfg_buf[index] = buf; + bufs->lcfg_buflen[index] = buflen; +} + +static inline void lustre_cfg_bufs_set_string(struct lustre_cfg_bufs *bufs, + __u32 index, char *str) +{ + lustre_cfg_bufs_set(bufs, index, str, str ? strlen(str) + 1 : 0); +} + +static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs, + char *name) +{ + memset((bufs), 0, sizeof(*bufs)); + if (name) + lustre_cfg_bufs_set_string(bufs, 0, name); +} + +static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, __u32 index) +{ + __u32 i; + size_t offset; + __u32 bufcount; + + if (!lcfg) + return NULL; + + bufcount = lcfg->lcfg_bufcount; + if (index >= bufcount) + return NULL; + + offset = LCFG_HDR_SIZE(lcfg->lcfg_bufcount); + for (i = 0; i < index; i++) + offset += __ALIGN_KERNEL(lcfg->lcfg_buflens[i], 8); + return (char *)lcfg + offset; +} + +static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs, + struct lustre_cfg *lcfg) +{ + __u32 i; + + bufs->lcfg_bufcount = lcfg->lcfg_bufcount; + for (i = 0; i < bufs->lcfg_bufcount; i++) { + bufs->lcfg_buflen[i] = lcfg->lcfg_buflens[i]; + bufs->lcfg_buf[i] = lustre_cfg_buf(lcfg, i); + } +} + +static inline __u32 lustre_cfg_len(__u32 bufcount, __u32 *buflens) +{ + __u32 i; + __u32 len; + + len = LCFG_HDR_SIZE(bufcount); + for (i = 0; i < bufcount; i++) + len += __ALIGN_KERNEL(buflens[i], 8); + + return __ALIGN_KERNEL(len, 8); +} + +static inline void lustre_cfg_init(struct lustre_cfg *lcfg, int cmd, + struct lustre_cfg_bufs *bufs) +{ + char *ptr; + __u32 i; + + lcfg->lcfg_version = LUSTRE_CFG_VERSION; + lcfg->lcfg_command = cmd; + lcfg->lcfg_bufcount = bufs->lcfg_bufcount; + + ptr = (char *)lcfg + LCFG_HDR_SIZE(lcfg->lcfg_bufcount); + for (i = 0; i < lcfg->lcfg_bufcount; i++) { + lcfg->lcfg_buflens[i] = bufs->lcfg_buflen[i]; + if (bufs->lcfg_buf[i]) { + memcpy(ptr, bufs->lcfg_buf[i], bufs->lcfg_buflen[i]); + ptr += __ALIGN_KERNEL(bufs->lcfg_buflen[i], 8); + } + } +} + +static inline int lustre_cfg_sanity_check(void *buf, size_t len) +{ + struct lustre_cfg *lcfg = (struct lustre_cfg *)buf; + + if (!lcfg) + return -EINVAL; + + /* check that the first bits of the struct are valid */ + if (len < LCFG_HDR_SIZE(0)) + return -EINVAL; + + if (lcfg->lcfg_version != LUSTRE_CFG_VERSION) + return -EINVAL; + + if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT) + return -EINVAL; + + /* check that the buflens are valid */ + if (len < LCFG_HDR_SIZE(lcfg->lcfg_bufcount)) + return -EINVAL; + + /* make sure all the pointers point inside the data */ + if (len < lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens)) + return -EINVAL; + + return 0; +} + +/** @} cfg */ + +#endif /* _UAPI_LUSTRE_CFG_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h new file mode 100644 index 000000000000..2e7a8d103777 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h @@ -0,0 +1,293 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2016 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * all fid manipulation functions go here + * + * FIDS are globally unique within a Lustre filessytem, and are made up + * of three parts: sequence, Object ID, and version. + * + */ +#ifndef _UAPI_LUSTRE_FID_H_ +#define _UAPI_LUSTRE_FID_H_ + +#include <uapi/linux/lustre/lustre_idl.h> + +/** returns fid object sequence */ +static inline __u64 fid_seq(const struct lu_fid *fid) +{ + return fid->f_seq; +} + +/** returns fid object id */ +static inline __u32 fid_oid(const struct lu_fid *fid) +{ + return fid->f_oid; +} + +/** returns fid object version */ +static inline __u32 fid_ver(const struct lu_fid *fid) +{ + return fid->f_ver; +} + +static inline void fid_zero(struct lu_fid *fid) +{ + memset(fid, 0, sizeof(*fid)); +} + +static inline __u64 fid_ver_oid(const struct lu_fid *fid) +{ + return (__u64)fid_ver(fid) << 32 | fid_oid(fid); +} + +static inline bool fid_seq_is_mdt0(__u64 seq) +{ + return seq == FID_SEQ_OST_MDT0; +} + +static inline bool fid_seq_is_mdt(__u64 seq) +{ + return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL; +}; + +static inline bool fid_seq_is_echo(__u64 seq) +{ + return seq == FID_SEQ_ECHO; +} + +static inline bool fid_is_echo(const struct lu_fid *fid) +{ + return fid_seq_is_echo(fid_seq(fid)); +} + +static inline bool fid_seq_is_llog(__u64 seq) +{ + return seq == FID_SEQ_LLOG; +} + +static inline bool fid_is_llog(const struct lu_fid *fid) +{ + /* file with OID == 0 is not llog but contains last oid */ + return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0; +} + +static inline bool fid_seq_is_rsvd(__u64 seq) +{ + return seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD; +}; + +static inline bool fid_seq_is_special(__u64 seq) +{ + return seq == FID_SEQ_SPECIAL; +}; + +static inline bool fid_seq_is_local_file(__u64 seq) +{ + return seq == FID_SEQ_LOCAL_FILE || + seq == FID_SEQ_LOCAL_NAME; +}; + +static inline bool fid_seq_is_root(__u64 seq) +{ + return seq == FID_SEQ_ROOT; +} + +static inline bool fid_seq_is_dot(__u64 seq) +{ + return seq == FID_SEQ_DOT_LUSTRE; +} + +static inline bool fid_seq_is_default(__u64 seq) +{ + return seq == FID_SEQ_LOV_DEFAULT; +} + +static inline bool fid_is_mdt0(const struct lu_fid *fid) +{ + return fid_seq_is_mdt0(fid_seq(fid)); +} + +/** + * Check if a fid is igif or not. + * \param fid the fid to be tested. + * \return true if the fid is an igif; otherwise false. + */ +static inline bool fid_seq_is_igif(__u64 seq) +{ + return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX; +} + +static inline bool fid_is_igif(const struct lu_fid *fid) +{ + return fid_seq_is_igif(fid_seq(fid)); +} + +/** + * Check if a fid is idif or not. + * \param fid the fid to be tested. + * \return true if the fid is an idif; otherwise false. + */ +static inline bool fid_seq_is_idif(__u64 seq) +{ + return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX; +} + +static inline bool fid_is_idif(const struct lu_fid *fid) +{ + return fid_seq_is_idif(fid_seq(fid)); +} + +static inline bool fid_is_local_file(const struct lu_fid *fid) +{ + return fid_seq_is_local_file(fid_seq(fid)); +} + +static inline bool fid_seq_is_norm(__u64 seq) +{ + return (seq >= FID_SEQ_NORMAL); +} + +static inline bool fid_is_norm(const struct lu_fid *fid) +{ + return fid_seq_is_norm(fid_seq(fid)); +} + +/* convert an OST objid into an IDIF FID SEQ number */ +static inline __u64 fid_idif_seq(__u64 id, __u32 ost_idx) +{ + return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff); +} + +/* convert a packed IDIF FID into an OST objid */ +static inline __u64 fid_idif_id(__u64 seq, __u32 oid, __u32 ver) +{ + return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid; +} + +static inline __u32 idif_ost_idx(__u64 seq) +{ + return (seq >> 16) & 0xffff; +} + +/* extract ost index from IDIF FID */ +static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid) +{ + return idif_ost_idx(fid_seq(fid)); +} + +/** + * Get inode number from an igif. + * \param fid an igif to get inode number from. + * \return inode number for the igif. + */ +static inline ino_t lu_igif_ino(const struct lu_fid *fid) +{ + return fid_seq(fid); +} + +/** + * Get inode generation from an igif. + * \param fid an igif to get inode generation from. + * \return inode generation for the igif. + */ +static inline __u32 lu_igif_gen(const struct lu_fid *fid) +{ + return fid_oid(fid); +} + +/** + * Build igif from the inode number/generation. + */ +static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen) +{ + fid->f_seq = ino; + fid->f_oid = gen; + fid->f_ver = 0; +} + +/* + * Fids are transmitted across network (in the sender byte-ordering), + * and stored on disk in big-endian order. + */ +static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src) +{ + dst->f_seq = __cpu_to_le64(fid_seq(src)); + dst->f_oid = __cpu_to_le32(fid_oid(src)); + dst->f_ver = __cpu_to_le32(fid_ver(src)); +} + +static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src) +{ + dst->f_seq = __le64_to_cpu(fid_seq(src)); + dst->f_oid = __le32_to_cpu(fid_oid(src)); + dst->f_ver = __le32_to_cpu(fid_ver(src)); +} + +static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src) +{ + dst->f_seq = __cpu_to_be64(fid_seq(src)); + dst->f_oid = __cpu_to_be32(fid_oid(src)); + dst->f_ver = __cpu_to_be32(fid_ver(src)); +} + +static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src) +{ + dst->f_seq = __be64_to_cpu(fid_seq(src)); + dst->f_oid = __be32_to_cpu(fid_oid(src)); + dst->f_ver = __be32_to_cpu(fid_ver(src)); +} + +static inline bool fid_is_sane(const struct lu_fid *fid) +{ + return fid && ((fid_seq(fid) >= FID_SEQ_START && !fid_ver(fid)) || + fid_is_igif(fid) || fid_is_idif(fid) || + fid_seq_is_rsvd(fid_seq(fid))); +} + +static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1) +{ + return !memcmp(f0, f1, sizeof(*f0)); +} + +static inline int lu_fid_cmp(const struct lu_fid *f0, + const struct lu_fid *f1) +{ + if (fid_seq(f0) != fid_seq(f1)) + return fid_seq(f0) > fid_seq(f1) ? 1 : -1; + + if (fid_oid(f0) != fid_oid(f1)) + return fid_oid(f0) > fid_oid(f1) ? 1 : -1; + + if (fid_ver(f0) != fid_ver(f1)) + return fid_ver(f0) > fid_ver(f1) ? 1 : -1; + + return 0; +} +#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h new file mode 100644 index 000000000000..f5214dc33c60 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h @@ -0,0 +1,72 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2014, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * FIEMAP data structures and flags. This header file will be used until + * fiemap.h is available in the upstream kernel. + * + * Author: Kalpak Shah <kalpak.shah@sun.com> + * Author: Andreas Dilger <adilger@sun.com> + */ + +#ifndef _LUSTRE_FIEMAP_H +#define _LUSTRE_FIEMAP_H + +#include <stddef.h> +#include <linux/fiemap.h> + +/* XXX: We use fiemap_extent::fe_reserved[0] */ +#define fe_device fe_reserved[0] + +static inline size_t fiemap_count_to_size(size_t extent_count) +{ + return sizeof(struct fiemap) + extent_count * + sizeof(struct fiemap_extent); +} + +static inline unsigned fiemap_size_to_count(size_t array_size) +{ + return (array_size - sizeof(struct fiemap)) / + sizeof(struct fiemap_extent); +} + +#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */ + +#ifdef FIEMAP_FLAGS_COMPAT +#undef FIEMAP_FLAGS_COMPAT +#endif + +/* Lustre specific flags - use a high bit, don't conflict with upstream flag */ +#define FIEMAP_EXTENT_NO_DIRECT 0x40000000 /* Data mapping undefined */ +#define FIEMAP_EXTENT_NET 0x80000000 /* Data stored remotely. + * Sets NO_DIRECT flag + */ + +#endif /* _LUSTRE_FIEMAP_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h new file mode 100644 index 000000000000..aac98dbcf6e3 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -0,0 +1,2688 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Lustre wire protocol definitions. + */ + +/** \defgroup lustreidl lustreidl + * + * Lustre wire protocol definitions. + * + * ALL structs passing over the wire should be declared here. Structs + * that are used in interfaces with userspace should go in lustre_user.h. + * + * All structs being declared here should be built from simple fixed-size + * types (__u8, __u16, __u32, __u64) or be built from other types or + * structs also declared in this file. Similarly, all flags and magic + * values in those structs should also be declared here. This ensures + * that the Lustre wire protocol is not influenced by external dependencies. + * + * The only other acceptable items in this file are VERY SIMPLE accessor + * functions to avoid callers grubbing inside the structures. Nothing that + * depends on external functions or definitions should be in here. + * + * Structs must be properly aligned to put 64-bit values on an 8-byte + * boundary. Any structs being added here must also be added to + * utils/wirecheck.c and "make newwiretest" run to regenerate the + * utils/wiretest.c sources. This allows us to verify that wire structs + * have the proper alignment/size on all architectures. + * + * DO NOT CHANGE any of the structs, flags, values declared here and used + * in released Lustre versions. Some structs may have padding fields that + * can be used. Some structs might allow addition at the end (verify this + * in the code to ensure that new/old clients that see this larger struct + * do not fail, otherwise you need to implement protocol compatibility). + * + * @{ + */ + +#ifndef _LUSTRE_IDL_H_ +#define _LUSTRE_IDL_H_ + +#include <asm/byteorder.h> +#include <linux/types.h> + +#include <uapi/linux/lnet/lnet-types.h> +/* Defn's shared with user-space. */ +#include <uapi/linux/lustre/lustre_user.h> +#include <uapi/linux/lustre/lustre_ver.h> + +/* + * GENERAL STUFF + */ +/* FOO_REQUEST_PORTAL is for incoming requests on the FOO + * FOO_REPLY_PORTAL is for incoming replies on the FOO + * FOO_BULK_PORTAL is for incoming bulk on the FOO + */ + +/* Lustre service names are following the format + * service name + MDT + seq name + */ +#define LUSTRE_MDT_MAXNAMELEN 80 + +#define CONNMGR_REQUEST_PORTAL 1 +#define CONNMGR_REPLY_PORTAL 2 +/*#define OSC_REQUEST_PORTAL 3 */ +#define OSC_REPLY_PORTAL 4 +/*#define OSC_BULK_PORTAL 5 */ +#define OST_IO_PORTAL 6 +#define OST_CREATE_PORTAL 7 +#define OST_BULK_PORTAL 8 +/*#define MDC_REQUEST_PORTAL 9 */ +#define MDC_REPLY_PORTAL 10 +/*#define MDC_BULK_PORTAL 11 */ +#define MDS_REQUEST_PORTAL 12 +/*#define MDS_REPLY_PORTAL 13 */ +#define MDS_BULK_PORTAL 14 +#define LDLM_CB_REQUEST_PORTAL 15 +#define LDLM_CB_REPLY_PORTAL 16 +#define LDLM_CANCEL_REQUEST_PORTAL 17 +#define LDLM_CANCEL_REPLY_PORTAL 18 +/*#define PTLBD_REQUEST_PORTAL 19 */ +/*#define PTLBD_REPLY_PORTAL 20 */ +/*#define PTLBD_BULK_PORTAL 21 */ +#define MDS_SETATTR_PORTAL 22 +#define MDS_READPAGE_PORTAL 23 +#define OUT_PORTAL 24 + +#define MGC_REPLY_PORTAL 25 +#define MGS_REQUEST_PORTAL 26 +#define MGS_REPLY_PORTAL 27 +#define OST_REQUEST_PORTAL 28 +#define FLD_REQUEST_PORTAL 29 +#define SEQ_METADATA_PORTAL 30 +#define SEQ_DATA_PORTAL 31 +#define SEQ_CONTROLLER_PORTAL 32 +#define MGS_BULK_PORTAL 33 + +/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, + * n8851@cray.com + */ + +/* packet types */ +#define PTL_RPC_MSG_REQUEST 4711 +#define PTL_RPC_MSG_ERR 4712 +#define PTL_RPC_MSG_REPLY 4713 + +/* DON'T use swabbed values of MAGIC as magic! */ +#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3 +#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B + +#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2 + +#define PTLRPC_MSG_VERSION 0x00000003 +#define LUSTRE_VERSION_MASK 0xffff0000 +#define LUSTRE_OBD_VERSION 0x00010000 +#define LUSTRE_MDS_VERSION 0x00020000 +#define LUSTRE_OST_VERSION 0x00030000 +#define LUSTRE_DLM_VERSION 0x00040000 +#define LUSTRE_LOG_VERSION 0x00050000 +#define LUSTRE_MGS_VERSION 0x00060000 + +/** + * Describes a range of sequence, lsr_start is included but lsr_end is + * not in the range. + * Same structure is used in fld module where lsr_index field holds mdt id + * of the home mdt. + */ +struct lu_seq_range { + __u64 lsr_start; + __u64 lsr_end; + __u32 lsr_index; + __u32 lsr_flags; +}; + +struct lu_seq_range_array { + __u32 lsra_count; + __u32 lsra_padding; + struct lu_seq_range lsra_lsr[0]; +}; + +#define LU_SEQ_RANGE_MDT 0x0 +#define LU_SEQ_RANGE_OST 0x1 +#define LU_SEQ_RANGE_ANY 0x3 + +#define LU_SEQ_RANGE_MASK 0x3 + +/** \defgroup lu_fid lu_fid + * @{ + */ + +/** + * Flags for lustre_mdt_attrs::lma_compat and lustre_mdt_attrs::lma_incompat. + * Deprecated since HSM and SOM attributes are now stored in separate on-disk + * xattr. + */ +enum lma_compat { + LMAC_HSM = 0x00000001, +/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */ + LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */ + LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is + * under /O/<seq>/d<x>. + */ +}; + +/** + * Masks for all features that should be supported by a Lustre version to + * access a specific file. + * This information is stored in lustre_mdt_attrs::lma_incompat. + */ +enum lma_incompat { + LMAI_RELEASED = 0x00000001, /* file is released */ + LMAI_AGENT = 0x00000002, /* agent inode */ + LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object + * is on the remote MDT + */ +}; + +#define LMA_INCOMPAT_SUPP (LMAI_AGENT | LMAI_REMOTE_PARENT) + +/** + * fid constants + */ +enum { + /** LASTID file has zero OID */ + LUSTRE_FID_LASTID_OID = 0UL, + /** initial fid id value */ + LUSTRE_FID_INIT_OID = 1UL +}; + +/* copytool uses a 32b bitmask field to encode archive-Ids during register + * with MDT thru kuc. + * archive num = 0 => all + * archive num from 1 to 32 + */ +#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8) + +/** + * Note that reserved SEQ numbers below 12 will conflict with ldiskfs + * inodes in the IGIF namespace, so these reserved SEQ numbers can be + * used for other purposes and not risk collisions with existing inodes. + * + * Different FID Format + * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs + */ +enum fid_seq { + FID_SEQ_OST_MDT0 = 0, + FID_SEQ_LLOG = 1, /* unnamed llogs */ + FID_SEQ_ECHO = 2, + FID_SEQ_OST_MDT1 = 3, + FID_SEQ_OST_MAX = 9, /* Max MDT count before OST_on_FID */ + FID_SEQ_LLOG_NAME = 10, /* named llogs */ + FID_SEQ_RSVD = 11, + FID_SEQ_IGIF = 12, + FID_SEQ_IGIF_MAX = 0x0ffffffffULL, + FID_SEQ_IDIF = 0x100000000ULL, + FID_SEQ_IDIF_MAX = 0x1ffffffffULL, + /* Normal FID sequence starts from this value, i.e. 1<<33 */ + FID_SEQ_START = 0x200000000ULL, + /* sequence for local pre-defined FIDs listed in local_oid */ + FID_SEQ_LOCAL_FILE = 0x200000001ULL, + FID_SEQ_DOT_LUSTRE = 0x200000002ULL, + /* sequence is used for local named objects FIDs generated + * by local_object_storage library + */ + FID_SEQ_LOCAL_NAME = 0x200000003ULL, + /* Because current FLD will only cache the fid sequence, instead + * of oid on the client side, if the FID needs to be exposed to + * clients sides, it needs to make sure all of fids under one + * sequence will be located in one MDT. + */ + FID_SEQ_SPECIAL = 0x200000004ULL, + FID_SEQ_QUOTA = 0x200000005ULL, + FID_SEQ_QUOTA_GLB = 0x200000006ULL, + FID_SEQ_ROOT = 0x200000007ULL, /* Located on MDT0 */ + FID_SEQ_NORMAL = 0x200000400ULL, + FID_SEQ_LOV_DEFAULT = 0xffffffffffffffffULL +}; + +#define OBIF_OID_MAX_BITS 32 +#define OBIF_MAX_OID (1ULL << OBIF_OID_MAX_BITS) +#define OBIF_OID_MASK ((1ULL << OBIF_OID_MAX_BITS) - 1) +#define IDIF_OID_MAX_BITS 48 +#define IDIF_MAX_OID (1ULL << IDIF_OID_MAX_BITS) +#define IDIF_OID_MASK ((1ULL << IDIF_OID_MAX_BITS) - 1) + +/** OID for FID_SEQ_SPECIAL */ +enum special_oid { + /* Big Filesystem Lock to serialize rename operations */ + FID_OID_SPECIAL_BFL = 1UL, +}; + +/** OID for FID_SEQ_DOT_LUSTRE */ +enum dot_lustre_oid { + FID_OID_DOT_LUSTRE = 1UL, + FID_OID_DOT_LUSTRE_OBF = 2UL, +}; + +/** OID for FID_SEQ_ROOT */ +enum root_oid { + FID_OID_ROOT = 1UL, + FID_OID_ECHO_ROOT = 2UL, +}; + +/** @} lu_fid */ + +/** \defgroup lu_dir lu_dir + * @{ + */ + +/** + * Enumeration of possible directory entry attributes. + * + * Attributes follow directory entry header in the order they appear in this + * enumeration. + */ +enum lu_dirent_attrs { + LUDA_FID = 0x0001, + LUDA_TYPE = 0x0002, + LUDA_64BITHASH = 0x0004, +}; + +/** + * Layout of readdir pages, as transmitted on wire. + */ +struct lu_dirent { + /** valid if LUDA_FID is set. */ + struct lu_fid lde_fid; + /** a unique entry identifier: a hash or an offset. */ + __u64 lde_hash; + /** total record length, including all attributes. */ + __u16 lde_reclen; + /** name length */ + __u16 lde_namelen; + /** optional variable size attributes following this entry. + * taken from enum lu_dirent_attrs. + */ + __u32 lde_attrs; + /** name is followed by the attributes indicated in ->ldp_attrs, in + * their natural order. After the last attribute, padding bytes are + * added to make ->lde_reclen a multiple of 8. + */ + char lde_name[0]; +}; + +/* + * Definitions of optional directory entry attributes formats. + * + * Individual attributes do not have their length encoded in a generic way. It + * is assumed that consumer of an attribute knows its format. This means that + * it is impossible to skip over an unknown attribute, except by skipping over all + * remaining attributes (by using ->lde_reclen), which is not too + * constraining, because new server versions will append new attributes at + * the end of an entry. + */ + +/** + * Fid directory attribute: a fid of an object referenced by the entry. This + * will be almost always requested by the client and supplied by the server. + * + * Aligned to 8 bytes. + */ +/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */ + +/** + * File type. + * + * Aligned to 2 bytes. + */ +struct luda_type { + __u16 lt_type; +}; + +#ifndef IFSHIFT +#define IFSHIFT 12 +#endif + +#ifndef IFTODT +#define IFTODT(type) (((type) & S_IFMT) >> IFSHIFT) +#endif +#ifndef DTTOIF +#define DTTOIF(dirtype) ((dirtype) << IFSHIFT) +#endif + +struct lu_dirpage { + __le64 ldp_hash_start; + __le64 ldp_hash_end; + __le32 ldp_flags; + __le32 ldp_pad0; + struct lu_dirent ldp_entries[0]; +}; + +enum lu_dirpage_flags { + /** + * dirpage contains no entry. + */ + LDF_EMPTY = 1 << 0, + /** + * last entry's lde_hash equals ldp_hash_end. + */ + LDF_COLLIDE = 1 << 1 +}; + +static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp) +{ + if (__le32_to_cpu(dp->ldp_flags) & LDF_EMPTY) + return NULL; + else + return dp->ldp_entries; +} + +static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent) +{ + struct lu_dirent *next; + + if (__le16_to_cpu(ent->lde_reclen) != 0) + next = ((void *)ent) + __le16_to_cpu(ent->lde_reclen); + else + next = NULL; + + return next; +} + +static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr) +{ + size_t size; + + if (attr & LUDA_TYPE) { + const size_t align = sizeof(struct luda_type) - 1; + + size = (sizeof(struct lu_dirent) + namelen + align) & ~align; + size += sizeof(struct luda_type); + } else { + size = sizeof(struct lu_dirent) + namelen; + } + + return (size + 7) & ~7; +} + +#define MDS_DIR_END_OFF 0xfffffffffffffffeULL + +/** + * MDS_READPAGE page size + * + * This is the directory page size packed in MDS_READPAGE RPC. + * It's different than PAGE_SIZE because the client needs to + * access the struct lu_dirpage header packed at the beginning of + * the "page" and without this there isn't any way to know find the + * lu_dirpage header is if client and server PAGE_SIZE differ. + */ +#define LU_PAGE_SHIFT 12 +#define LU_PAGE_SIZE (1UL << LU_PAGE_SHIFT) +#define LU_PAGE_MASK (~(LU_PAGE_SIZE - 1)) + +#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT)) + +/** @} lu_dir */ + +struct lustre_handle { + __u64 cookie; +}; + +#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL + +static inline bool lustre_handle_is_used(const struct lustre_handle *lh) +{ + return lh->cookie != 0ull; +} + +static inline bool lustre_handle_equal(const struct lustre_handle *lh1, + const struct lustre_handle *lh2) +{ + return lh1->cookie == lh2->cookie; +} + +static inline void lustre_handle_copy(struct lustre_handle *tgt, + const struct lustre_handle *src) +{ + tgt->cookie = src->cookie; +} + +/* flags for lm_flags */ +#define MSGHDR_AT_SUPPORT 0x1 +#define MSGHDR_CKSUM_INCOMPAT18 0x2 + +#define lustre_msg lustre_msg_v2 +/* we depend on this structure to be 8-byte aligned */ +/* this type is only endian-adjusted in lustre_unpack_msg() */ +struct lustre_msg_v2 { + __u32 lm_bufcount; + __u32 lm_secflvr; + __u32 lm_magic; + __u32 lm_repsize; + __u32 lm_cksum; + __u32 lm_flags; + __u32 lm_padding_2; + __u32 lm_padding_3; + __u32 lm_buflens[0]; +}; + +/* without gss, ptlrpc_body is put at the first buffer. */ +#define PTLRPC_NUM_VERSIONS 4 + +struct ptlrpc_body_v3 { + struct lustre_handle pb_handle; + __u32 pb_type; + __u32 pb_version; + __u32 pb_opc; + __u32 pb_status; + __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ + __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ + __u16 pb_padding0; + __u32 pb_padding1; + __u64 pb_last_committed; + __u64 pb_transno; + __u32 pb_flags; + __u32 pb_op_flags; + __u32 pb_conn_cnt; + __u32 pb_timeout; /* for req, the deadline, for rep, the service est */ + __u32 pb_service_time; /* for rep, actual service time */ + __u32 pb_limit; + __u64 pb_slv; + /* VBR: pre-versions */ + __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; + __u64 pb_mbits; /**< match bits for bulk request */ + /* padding for future needs */ + __u64 pb_padding64_0; + __u64 pb_padding64_1; + __u64 pb_padding64_2; + char pb_jobid[LUSTRE_JOBID_SIZE]; +}; + +#define ptlrpc_body ptlrpc_body_v3 + +struct ptlrpc_body_v2 { + struct lustre_handle pb_handle; + __u32 pb_type; + __u32 pb_version; + __u32 pb_opc; + __u32 pb_status; + __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ + __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ + __u16 pb_padding0; + __u32 pb_padding1; + __u64 pb_last_committed; + __u64 pb_transno; + __u32 pb_flags; + __u32 pb_op_flags; + __u32 pb_conn_cnt; + __u32 pb_timeout; /* for req, the deadline, for rep, the service est */ + __u32 pb_service_time; /* for rep, actual service time, also used for + * net_latency of req + */ + __u32 pb_limit; + __u64 pb_slv; + /* VBR: pre-versions */ + __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; + __u64 pb_mbits; /**< unused in V2 */ + /* padding for future needs */ + __u64 pb_padding64_0; + __u64 pb_padding64_1; + __u64 pb_padding64_2; +}; + +/* message body offset for lustre_msg_v2 */ +/* ptlrpc body offset in all request/reply messages */ +#define MSG_PTLRPC_BODY_OFF 0 + +/* normal request/reply message record offset */ +#define REQ_REC_OFF 1 +#define REPLY_REC_OFF 1 + +/* ldlm request message body offset */ +#define DLM_LOCKREQ_OFF 1 /* lockreq offset */ +#define DLM_REQ_REC_OFF 2 /* normal dlm request record offset */ + +/* ldlm intent lock message body offset */ +#define DLM_INTENT_IT_OFF 2 /* intent lock it offset */ +#define DLM_INTENT_REC_OFF 3 /* intent lock record offset */ + +/* ldlm reply message body offset */ +#define DLM_LOCKREPLY_OFF 1 /* lockrep offset */ +#define DLM_REPLY_REC_OFF 2 /* reply record offset */ + +/** only use in req->rq_{req,rep}_swab_mask */ +#define MSG_PTLRPC_HEADER_OFF 31 + +/* Flags that are operation-specific go in the top 16 bits. */ +#define MSG_OP_FLAG_MASK 0xffff0000 +#define MSG_OP_FLAG_SHIFT 16 + +/* Flags that apply to all requests are in the bottom 16 bits */ +#define MSG_GEN_FLAG_MASK 0x0000ffff +#define MSG_LAST_REPLAY 0x0001 +#define MSG_RESENT 0x0002 +#define MSG_REPLAY 0x0004 +/* #define MSG_AT_SUPPORT 0x0008 + * This was used in early prototypes of adaptive timeouts, and while there + * shouldn't be any users of that code there also isn't a need for using this + * bits. Defer usage until at least 1.10 to avoid potential conflict. + */ +#define MSG_DELAY_REPLAY 0x0010 +#define MSG_VERSION_REPLAY 0x0020 +#define MSG_REQ_REPLAY_DONE 0x0040 +#define MSG_LOCK_REPLAY_DONE 0x0080 + +/* + * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT) + */ + +#define MSG_CONNECT_RECOVERING 0x00000001 +#define MSG_CONNECT_RECONNECT 0x00000002 +#define MSG_CONNECT_REPLAYABLE 0x00000004 +/*#define MSG_CONNECT_PEER 0x8 */ +#define MSG_CONNECT_LIBCLIENT 0x00000010 +#define MSG_CONNECT_INITIAL 0x00000020 +#define MSG_CONNECT_ASYNC 0x00000040 +#define MSG_CONNECT_NEXT_VER 0x00000080 /* use next version of lustre_msg */ +#define MSG_CONNECT_TRANSNO 0x00000100 /* report transno */ + +/* Connect flags */ +#define OBD_CONNECT_RDONLY 0x1ULL /*client has read-only access*/ +#define OBD_CONNECT_INDEX 0x2ULL /*connect specific LOV idx */ +#define OBD_CONNECT_MDS 0x4ULL /*connect from MDT to OST */ +#define OBD_CONNECT_GRANT 0x8ULL /*OSC gets grant at connect */ +#define OBD_CONNECT_SRVLOCK 0x10ULL /*server takes locks for cli */ +#define OBD_CONNECT_VERSION 0x20ULL /*Lustre versions in ocd */ +#define OBD_CONNECT_REQPORTAL 0x40ULL /*Separate non-IO req portal */ +#define OBD_CONNECT_ACL 0x80ULL /*access control lists */ +#define OBD_CONNECT_XATTR 0x100ULL /*client use extended attr */ +#define OBD_CONNECT_CROW 0x200ULL /*MDS+OST create obj on write*/ +#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */ +#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends init transno */ +#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks*/ +#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated. + *We do not support JOIN FILE + *anymore, reserve this flags + *just for preventing such bit + *to be reused. + */ +#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server can GetAttr By Fid*/ +#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open hndl on specl nodes*/ +#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client, never used + * in production. Removed in + * 2.9. Keep this flag to + * avoid reuse. + */ +#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /* Remote client by force, + * never used in production. + * Removed in 2.9. Keep this + * flag to avoid reuse + */ +#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */ +#define OBD_CONNECT_QUOTA64 0x80000ULL /*Not used since 2.4 */ +#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */ +#define OBD_CONNECT_OSS_CAPA 0x200000ULL /*OSS capability */ +#define OBD_CONNECT_CANCELSET 0x400000ULL /*Early batched cancels. */ +#define OBD_CONNECT_SOM 0x800000ULL /*Size on MDS */ +#define OBD_CONNECT_AT 0x1000000ULL /*client uses AT */ +#define OBD_CONNECT_LRU_RESIZE 0x2000000ULL /*LRU resize feature. */ +#define OBD_CONNECT_MDS_MDS 0x4000000ULL /*MDS-MDS connection */ +#define OBD_CONNECT_REAL 0x8000000ULL /* obsolete since 2.8 */ +#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*Not used since 2.4 */ +#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos*/ +#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */ +#define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */ +#define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */ +#define OBD_CONNECT_GRANT_SHRINK 0x200000000ULL /* support grant shrink */ +#define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */ +#define OBD_CONNECT_MAX_EASIZE 0x800000000ULL /* preserved for large EA */ +#define OBD_CONNECT_FULL20 0x1000000000ULL /* it is 2.0 client */ +#define OBD_CONNECT_LAYOUTLOCK 0x2000000000ULL /* client uses layout lock */ +#define OBD_CONNECT_64BITHASH 0x4000000000ULL /* client supports 64-bits + * directory hash + */ +#define OBD_CONNECT_MAXBYTES 0x8000000000ULL /* max stripe size */ +#define OBD_CONNECT_IMP_RECOV 0x10000000000ULL /* imp recovery support */ +#define OBD_CONNECT_JOBSTATS 0x20000000000ULL /* jobid in ptlrpc_body */ +#define OBD_CONNECT_UMASK 0x40000000000ULL /* create uses client umask */ +#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS + * RPC error properly + */ +#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for + * finer space reservation + */ +#define OBD_CONNECT_FLOCK_OWNER 0x200000000000ULL /* for the fixed 1.8 + * policy and 2.x server + */ +#define OBD_CONNECT_LVB_TYPE 0x400000000000ULL /* variable type of LVB */ +#define OBD_CONNECT_NANOSEC_TIME 0x800000000000ULL /* nanosecond timestamps */ +#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */ +#define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */ +#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */ +#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */ +#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/ +#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack + * name in request + */ +#define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */ +#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */ +#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify + * RPCs in parallel + */ +#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */ +#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */ +#define OBD_CONNECT_LOCK_AHEAD 0x1000000000000000ULL /* lock ahead */ +/** bulk matchbits is sent within ptlrpc_body */ +#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL +#define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */ +#define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */ + +/* XXX README XXX: + * Please DO NOT add flag values here before first ensuring that this same + * flag value is not in use on some other branch. Please clear any such + * changes with senior engineers before starting to use a new flag. Then, + * submit a small patch against EVERY branch that ONLY adds the new flag, + * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the + * flag to check_obd_connect_data(), and updates wiretests accordingly, so it + * can be approved and landed easily to reserve the flag for future use. + */ + +/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS + * connection. It is a temporary bug fix for Imperative Recovery interop + * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for + * 2.2 clients/servers is no longer needed. LU-1252/LU-1644. + */ +#define OBD_CONNECT_MNE_SWAB OBD_CONNECT_MDS_MDS + +#define OCD_HAS_FLAG(ocd, flg) \ + (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg)) + +/* Features required for this version of the client to work with server */ +#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \ + OBD_CONNECT_FULL20) + +/* This structure is used for both request and reply. + * + * If we eventually have separate connect data for different types, which we + * almost certainly will, then perhaps we stick a union in here. + */ +struct obd_connect_data { + __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ + __u32 ocd_version; /* lustre release version number */ + __u32 ocd_grant; /* initial cache grant amount (bytes) */ + __u32 ocd_index; /* LOV index to connect to */ + __u32 ocd_brw_size; /* Maximum BRW size in bytes */ + __u64 ocd_ibits_known; /* inode bits this client understands */ + __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */ + __u8 ocd_inodespace; /* log2 of the per-inode space consumption */ + __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */ + __u32 ocd_unused; /* also fix lustre_swab_connect */ + __u64 ocd_transno; /* first transno from client to be replayed */ + __u32 ocd_group; /* MDS group on OST */ + __u32 ocd_cksum_types; /* supported checksum algorithms */ + __u32 ocd_max_easize; /* How big LOV EA can be on MDS */ + __u32 ocd_instance; /* instance # of this target */ + __u64 ocd_maxbytes; /* Maximum stripe size in bytes */ + /* Fields after ocd_maxbytes are only accessible by the receiver + * if the corresponding flag in ocd_connect_flags is set. Accessing + * any field after ocd_maxbytes on the receiver without a valid flag + * may result in out-of-bound memory access and kernel oops. + */ + __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */ + __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */ + __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 ocd_connect_flags2; + __u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 padding6; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 padding7; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 padding8; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 padding9; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 paddingA; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 paddingB; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 paddingC; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 paddingD; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 paddingE; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 paddingF; /* added 2.1.0. also fix lustre_swab_connect */ +}; + +/* XXX README XXX: + * Please DO NOT use any fields here before first ensuring that this same + * field is not in use on some other branch. Please clear any such changes + * with senior engineers before starting to use a new field. Then, submit + * a small patch against EVERY branch that ONLY adds the new field along with + * the matching OBD_CONNECT flag, so that can be approved and landed easily to + * reserve the flag for future use. + */ + +/* + * Supported checksum algorithms. Up to 32 checksum types are supported. + * (32-bit mask stored in obd_connect_data::ocd_cksum_types) + * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new + * algorithm and also the OBD_FL_CKSUM* flags. + */ +enum cksum_type { + OBD_CKSUM_CRC32 = 0x00000001, + OBD_CKSUM_ADLER = 0x00000002, + OBD_CKSUM_CRC32C = 0x00000004, +}; + +/* + * OST requests: OBDO & OBD request records + */ + +/* opcodes */ +enum ost_cmd { + OST_REPLY = 0, /* reply ? */ + OST_GETATTR = 1, + OST_SETATTR = 2, + OST_READ = 3, + OST_WRITE = 4, + OST_CREATE = 5, + OST_DESTROY = 6, + OST_GET_INFO = 7, + OST_CONNECT = 8, + OST_DISCONNECT = 9, + OST_PUNCH = 10, + OST_OPEN = 11, + OST_CLOSE = 12, + OST_STATFS = 13, + OST_SYNC = 16, + OST_SET_INFO = 17, + OST_QUOTACHECK = 18, /* not used since 2.4 */ + OST_QUOTACTL = 19, + OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */ + OST_LAST_OPC +}; +#define OST_FIRST_OPC OST_REPLY + +enum obdo_flags { + OBD_FL_INLINEDATA = 0x00000001, + OBD_FL_OBDMDEXISTS = 0x00000002, + OBD_FL_DELORPHAN = 0x00000004, /* if set in o_flags delete orphans */ + OBD_FL_NORPC = 0x00000008, /* set in o_flags do in OSC not OST */ + OBD_FL_IDONLY = 0x00000010, /* set in o_flags only adjust obj id*/ + OBD_FL_RECREATE_OBJS = 0x00000020, /* recreate missing obj */ + OBD_FL_DEBUG_CHECK = 0x00000040, /* echo client/server debug check */ + OBD_FL_NO_USRQUOTA = 0x00000100, /* the object's owner is over quota */ + OBD_FL_NO_GRPQUOTA = 0x00000200, /* the object's group is over quota */ + OBD_FL_CREATE_CROW = 0x00000400, /* object should be create on write */ + OBD_FL_SRVLOCK = 0x00000800, /* delegate DLM locking to server */ + OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ + OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ + OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ + OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */ + OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ + OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ + OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client. + * XXX: obsoleted - reserved for old + * clients prior than 2.2 + */ + OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ + OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ + OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */ + OBD_FL_SHORT_IO = 0x00400000, /* short io request */ + + /* Note that while these checksum values are currently separate bits, + * in 2.x we can actually allow all values from 1-31 if we wanted. + */ + OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER | + OBD_FL_CKSUM_CRC32C, + + /* mask for local-only flag, which won't be sent over network */ + OBD_FL_LOCAL_MASK = 0xF0000000, +}; + +/* + * All LOV EA magics should have the same postfix, if some new version + * Lustre instroduces new LOV EA magic, then when down-grade to an old + * Lustre, even though the old version system does not recognizes such + * new magic, it still can distinguish the corrupted cases by checking + * the magic's postfix. + */ +#define LOV_MAGIC_MAGIC 0x0BD0 +#define LOV_MAGIC_MASK 0xFFFF + +#define LOV_MAGIC_V1 (0x0BD10000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_JOIN_V1 (0x0BD20000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_V3 (0x0BD30000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_MIGRATE (0x0BD40000 | LOV_MAGIC_MAGIC) +/* reserved for specifying OSTs */ +#define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC LOV_MAGIC_V1 + +/* + * magic for fully defined striping + * the idea is that we should have different magics for striping "hints" + * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct + * lov_mds_md_v[13]). at the moment the magics are used in wire protocol, + * we can't just change it w/o long way preparation, but we still need a + * mechanism to allow LOD to differentiate hint versus ready striping. + * so, at the moment we do a trick: MDT knows what to expect from request + * depending on the case (replay uses ready striping, non-replay req uses + * hints), so MDT replaces magic with appropriate one and now LOD can + * easily understand what's inside -bzzz + */ +#define LOV_MAGIC_V1_DEF 0x0CD10BD0 +#define LOV_MAGIC_V3_DEF 0x0CD30BD0 + +#define lov_pattern(pattern) (pattern & ~LOV_PATTERN_F_MASK) +#define lov_pattern_flags(pattern) (pattern & LOV_PATTERN_F_MASK) + +#define lov_ost_data lov_ost_data_v1 +struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/ + struct ost_id l_ost_oi; /* OST object ID */ + __u32 l_ost_gen; /* generation of this l_ost_idx */ + __u32 l_ost_idx; /* OST index in LOV (lov_tgt_desc->tgts) */ +}; + +#define lov_mds_md lov_mds_md_v1 +struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ + __u32 lmm_magic; /* magic number = LOV_MAGIC_V1 */ + __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ + struct ost_id lmm_oi; /* LOV object ID */ + __u32 lmm_stripe_size; /* size of stripe in bytes */ + /* lmm_stripe_count used to be __u32 */ + __u16 lmm_stripe_count; /* num stripes in use for this object */ + __u16 lmm_layout_gen; /* layout generation number */ + struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ +}; + +#define MAX_MD_SIZE \ + (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) +#define MIN_MD_SIZE \ + (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) + +#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" +#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default" +#define XATTR_USER_PREFIX "user." +#define XATTR_TRUSTED_PREFIX "trusted." +#define XATTR_SECURITY_PREFIX "security." +#define XATTR_LUSTRE_PREFIX "lustre." + +#define XATTR_NAME_LOV "trusted.lov" +#define XATTR_NAME_LMA "trusted.lma" +#define XATTR_NAME_LMV "trusted.lmv" +#define XATTR_NAME_DEFAULT_LMV "trusted.dmv" +#define XATTR_NAME_LINK "trusted.link" +#define XATTR_NAME_FID "trusted.fid" +#define XATTR_NAME_VERSION "trusted.version" +#define XATTR_NAME_SOM "trusted.som" +#define XATTR_NAME_HSM "trusted.hsm" +#define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace" + +struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */ + __u32 lmm_magic; /* magic number = LOV_MAGIC_V3 */ + __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ + struct ost_id lmm_oi; /* LOV object ID */ + __u32 lmm_stripe_size; /* size of stripe in bytes */ + /* lmm_stripe_count used to be __u32 */ + __u16 lmm_stripe_count; /* num stripes in use for this object */ + __u16 lmm_layout_gen; /* layout generation number */ + char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* must be 32bit aligned */ + struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ +}; + +static inline __u32 lov_mds_md_size(__u16 stripes, __u32 lmm_magic) +{ + if (lmm_magic == LOV_MAGIC_V3) + return sizeof(struct lov_mds_md_v3) + + stripes * sizeof(struct lov_ost_data_v1); + else + return sizeof(struct lov_mds_md_v1) + + stripes * sizeof(struct lov_ost_data_v1); +} + +static inline __u32 +lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) +{ + switch (lmm_magic) { + case LOV_MAGIC_V1: { + struct lov_mds_md_v1 lmm; + + if (buf_size < sizeof(lmm)) + return 0; + + return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]); + } + case LOV_MAGIC_V3: { + struct lov_mds_md_v3 lmm; + + if (buf_size < sizeof(lmm)) + return 0; + + return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]); + } + default: + return 0; + } +} + +#define OBD_MD_FLID (0x00000001ULL) /* object ID */ +#define OBD_MD_FLATIME (0x00000002ULL) /* access time */ +#define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ +#define OBD_MD_FLCTIME (0x00000008ULL) /* change time */ +#define OBD_MD_FLSIZE (0x00000010ULL) /* size */ +#define OBD_MD_FLBLOCKS (0x00000020ULL) /* allocated blocks count */ +#define OBD_MD_FLBLKSZ (0x00000040ULL) /* block size */ +#define OBD_MD_FLMODE (0x00000080ULL) /* access bits (mode & ~S_IFMT) */ +#define OBD_MD_FLTYPE (0x00000100ULL) /* object type (mode & S_IFMT) */ +#define OBD_MD_FLUID (0x00000200ULL) /* user ID */ +#define OBD_MD_FLGID (0x00000400ULL) /* group ID */ +#define OBD_MD_FLFLAGS (0x00000800ULL) /* flags word */ +#define OBD_MD_FLNLINK (0x00002000ULL) /* link count */ +#define OBD_MD_FLGENER (0x00004000ULL) /* generation number */ +/*#define OBD_MD_FLINLINE (0x00008000ULL) inline data. used until 1.6.5 */ +#define OBD_MD_FLRDEV (0x00010000ULL) /* device number */ +#define OBD_MD_FLEASIZE (0x00020000ULL) /* extended attribute data */ +#define OBD_MD_LINKNAME (0x00040000ULL) /* symbolic link target */ +#define OBD_MD_FLHANDLE (0x00080000ULL) /* file/lock handle */ +#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */ +#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */ +/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */ +/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ +#define OBD_MD_FLGROUP (0x01000000ULL) /* group */ +#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ +#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */ + /* ->mds if epoch opens or closes + */ +#define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */ +#define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ +#define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */ +#define OBD_MD_FLGRPQUOTA (0x40000000ULL) /* over quota flags sent from ost */ +#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */ + +#define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ +#define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ +#define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */ +#define OBD_MD_TSTATE (0x0000000800000000ULL) /* transient state field */ + +#define OBD_MD_FLXATTR (0x0000001000000000ULL) /* xattr */ +#define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ +#define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ +#define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ +/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */ +#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ +#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ +#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ +#define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */ +#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes + * under lock; for xattr + * requests means the + * client holds the lock + */ +#define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */ + +/* OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */ +/* OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */ +/* OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */ +/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */ + +#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */ +#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent + * executed + */ + +#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */ + +#define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \ + OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ + OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \ + OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \ + OBD_MD_FLGENER | OBD_MD_FLRDEV | OBD_MD_FLGROUP) + +#define OBD_MD_FLXATTRALL (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS) + +/* don't forget obdo_fid which is way down at the bottom so it can + * come after the definition of llog_cookie + */ + +enum hss_valid { + HSS_SETMASK = 0x01, + HSS_CLEARMASK = 0x02, + HSS_ARCHIVE_ID = 0x04, +}; + +struct hsm_state_set { + __u32 hss_valid; + __u32 hss_archive_id; + __u64 hss_setmask; + __u64 hss_clearmask; +}; + +/* ost_body.data values for OST_BRW */ + +#define OBD_BRW_READ 0x01 +#define OBD_BRW_WRITE 0x02 +#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE) +#define OBD_BRW_SYNC 0x08 /* this page is a part of synchronous + * transfer and is not accounted in + * the grant. + */ +#define OBD_BRW_CHECK 0x10 +#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */ +#define OBD_BRW_GRANTED 0x40 /* the ost manages this */ +#define OBD_BRW_NOCACHE 0x80 /* this page is a part of non-cached IO */ +#define OBD_BRW_NOQUOTA 0x100 +#define OBD_BRW_SRVLOCK 0x200 /* Client holds no lock over this page */ +#define OBD_BRW_ASYNC 0x400 /* Server may delay commit to disk */ +#define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */ +#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */ +#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */ +#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server + * that the client is running low on + * space for unstable pages; asking + * it to sync quickly + */ + +#define OBD_OBJECT_EOF LUSTRE_EOF + +#define OST_MIN_PRECREATE 32 +#define OST_MAX_PRECREATE 20000 + +struct obd_ioobj { + struct ost_id ioo_oid; /* object ID, if multi-obj BRW */ + __u32 ioo_max_brw; /* low 16 bits were o_mode before 2.4, + * now (PTLRPC_BULK_OPS_COUNT - 1) in + * high 16 bits in 2.4 and later + */ + __u32 ioo_bufcnt; /* number of niobufs for this object */ +}; + +/* + * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in + * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS. + * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits. + */ +#define IOOBJ_MAX_BRW_BITS 16 +#define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1) +#define ioobj_max_brw_set(ioo, num) \ +do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0) + +/* multiple of 8 bytes => can array */ +struct niobuf_remote { + __u64 rnb_offset; + __u32 rnb_len; + __u32 rnb_flags; +}; + +/* lock value block communicated between the filter and llite */ + +/* OST_LVB_ERR_INIT is needed because the return code in rc is + * negative, i.e. because ((MASK + rc) & MASK) != MASK. + */ +#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL +#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL +#define OST_LVB_IS_ERR(blocks) \ + ((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK) +#define OST_LVB_SET_ERR(blocks, rc) \ + do { blocks = OST_LVB_ERR_INIT + rc; } while (0) +#define OST_LVB_GET_ERR(blocks) (int)(blocks - OST_LVB_ERR_INIT) + +struct ost_lvb_v1 { + __u64 lvb_size; + __s64 lvb_mtime; + __s64 lvb_atime; + __s64 lvb_ctime; + __u64 lvb_blocks; +}; + +struct ost_lvb { + __u64 lvb_size; + __s64 lvb_mtime; + __s64 lvb_atime; + __s64 lvb_ctime; + __u64 lvb_blocks; + __u32 lvb_mtime_ns; + __u32 lvb_atime_ns; + __u32 lvb_ctime_ns; + __u32 lvb_padding; +}; + +/* + * lquota data structures + */ + +/* The lquota_id structure is a union of all the possible identifier types that + * can be used with quota, this includes: + * - 64-bit user ID + * - 64-bit group ID + * - a FID which can be used for per-directory quota in the future + */ +union lquota_id { + struct lu_fid qid_fid; /* FID for per-directory quota */ + __u64 qid_uid; /* user identifier */ + __u64 qid_gid; /* group identifier */ +}; + +/* quotactl management */ +struct obd_quotactl { + __u32 qc_cmd; + __u32 qc_type; /* see Q_* flag below */ + __u32 qc_id; + __u32 qc_stat; + struct obd_dqinfo qc_dqinfo; + struct obd_dqblk qc_dqblk; +}; + +#define Q_COPY(out, in, member) (out)->member = (in)->member + +#define QCTL_COPY(out, in) \ +do { \ + Q_COPY(out, in, qc_cmd); \ + Q_COPY(out, in, qc_type); \ + Q_COPY(out, in, qc_id); \ + Q_COPY(out, in, qc_stat); \ + Q_COPY(out, in, qc_dqinfo); \ + Q_COPY(out, in, qc_dqblk); \ +} while (0) + +/* Data structures associated with the quota locks */ + +/* Glimpse descriptor used for the index & per-ID quota locks */ +struct ldlm_gl_lquota_desc { + union lquota_id gl_id; /* quota ID subject to the glimpse */ + __u64 gl_flags; /* see LQUOTA_FL* below */ + __u64 gl_ver; /* new index version */ + __u64 gl_hardlimit; /* new hardlimit or qunit value */ + __u64 gl_softlimit; /* new softlimit */ + __u64 gl_time; + __u64 gl_pad2; +}; + +/* quota glimpse flags */ +#define LQUOTA_FL_EDQUOT 0x1 /* user/group out of quota space on QMT */ + +/* LVB used with quota (global and per-ID) locks */ +struct lquota_lvb { + __u64 lvb_flags; /* see LQUOTA_FL* above */ + __u64 lvb_id_may_rel; /* space that might be released later */ + __u64 lvb_id_rel; /* space released by the slave for this ID */ + __u64 lvb_id_qunit; /* current qunit value */ + __u64 lvb_pad1; +}; + +/* op codes */ +enum quota_cmd { + QUOTA_DQACQ = 601, + QUOTA_DQREL = 602, + QUOTA_LAST_OPC +}; +#define QUOTA_FIRST_OPC QUOTA_DQACQ + +/* + * MDS REQ RECORDS + */ + +/* opcodes */ +enum mds_cmd { + MDS_GETATTR = 33, + MDS_GETATTR_NAME = 34, + MDS_CLOSE = 35, + MDS_REINT = 36, + MDS_READPAGE = 37, + MDS_CONNECT = 38, + MDS_DISCONNECT = 39, + MDS_GETSTATUS = 40, + MDS_STATFS = 41, + MDS_PIN = 42, /* obsolete, never used in a release */ + MDS_UNPIN = 43, /* obsolete, never used in a release */ + MDS_SYNC = 44, + MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */ + MDS_SET_INFO = 46, + MDS_QUOTACHECK = 47, /* not used since 2.4 */ + MDS_QUOTACTL = 48, + MDS_GETXATTR = 49, + MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */ + MDS_WRITEPAGE = 51, + MDS_IS_SUBDIR = 52, /* obsolete, never used in a release */ + MDS_GET_INFO = 53, + MDS_HSM_STATE_GET = 54, + MDS_HSM_STATE_SET = 55, + MDS_HSM_ACTION = 56, + MDS_HSM_PROGRESS = 57, + MDS_HSM_REQUEST = 58, + MDS_HSM_CT_REGISTER = 59, + MDS_HSM_CT_UNREGISTER = 60, + MDS_SWAP_LAYOUTS = 61, + MDS_LAST_OPC +}; + +#define MDS_FIRST_OPC MDS_GETATTR + +/* + * Do not exceed 63 + */ + +enum mdt_reint_cmd { + REINT_SETATTR = 1, + REINT_CREATE = 2, + REINT_LINK = 3, + REINT_UNLINK = 4, + REINT_RENAME = 5, + REINT_OPEN = 6, + REINT_SETXATTR = 7, + REINT_RMENTRY = 8, + REINT_MIGRATE = 9, + REINT_MAX +}; + +/* the disposition of the intent outlines what was executed */ +#define DISP_IT_EXECD 0x00000001 +#define DISP_LOOKUP_EXECD 0x00000002 +#define DISP_LOOKUP_NEG 0x00000004 +#define DISP_LOOKUP_POS 0x00000008 +#define DISP_OPEN_CREATE 0x00000010 +#define DISP_OPEN_OPEN 0x00000020 +#define DISP_ENQ_COMPLETE 0x00400000 /* obsolete and unused */ +#define DISP_ENQ_OPEN_REF 0x00800000 +#define DISP_ENQ_CREATE_REF 0x01000000 +#define DISP_OPEN_LOCK 0x02000000 +#define DISP_OPEN_LEASE 0x04000000 +#define DISP_OPEN_STRIPE 0x08000000 +#define DISP_OPEN_DENY 0x10000000 + +/* INODE LOCK PARTS */ +#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also + * was used to protect permission (mode, + * owner, group etc) before 2.4. + */ +#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */ +#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */ +#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */ + +/* The PERM bit is added int 2.4, and it is used to protect permission(mode, + * owner, group, acl etc), so to separate the permission from LOOKUP lock. + * Because for remote directories(in DNE), these locks will be granted by + * different MDTs(different ldlm namespace). + * + * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together. + * For Remote directory, the master MDT, where the remote directory is, will + * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is, + * will grant LOOKUP_LOCK. + */ +#define MDS_INODELOCK_PERM 0x000010 +#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */ + +#define MDS_INODELOCK_MAXSHIFT 5 +/* This FULL lock is useful to take on unlink sort of operations */ +#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1) + +/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2], + * but was moved into name[1] along with the OID to avoid consuming the + * name[2,3] fields that need to be used for the quota id (also a FID). + */ +enum { + LUSTRE_RES_ID_SEQ_OFF = 0, + LUSTRE_RES_ID_VER_OID_OFF = 1, + LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */ + LUSTRE_RES_ID_QUOTA_SEQ_OFF = 2, + LUSTRE_RES_ID_QUOTA_VER_OID_OFF = 3, + LUSTRE_RES_ID_HSH_OFF = 3 +}; + +#define MDS_STATUS_CONN 1 +#define MDS_STATUS_LOV 2 + +/* these should be identical to their EXT4_*_FL counterparts, they are + * redefined here only to avoid dragging in fs/ext4/ext4.h + */ +#define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */ +#define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */ +#define LUSTRE_NODUMP_FL 0x00000040 /* do not dump file */ +#define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */ +#define LUSTRE_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */ +#define LUSTRE_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define LUSTRE_DIRECTIO_FL 0x00100000 /* Use direct i/o */ +#define LUSTRE_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ + +/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values + * for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire + * protocol equivalents of LDISKFS_*_FL values stored on disk, while + * the S_* flags are kernel-internal values that change between kernel + * versions. These flags are set/cleared via FSFILT_IOC_{GET,SET}_FLAGS. + * See b=16526 for a full history. + */ +static inline int ll_ext_to_inode_flags(int flags) +{ + return (((flags & LUSTRE_SYNC_FL) ? S_SYNC : 0) | + ((flags & LUSTRE_NOATIME_FL) ? S_NOATIME : 0) | + ((flags & LUSTRE_APPEND_FL) ? S_APPEND : 0) | + ((flags & LUSTRE_DIRSYNC_FL) ? S_DIRSYNC : 0) | + ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0)); +} + +static inline int ll_inode_to_ext_flags(int iflags) +{ + return (((iflags & S_SYNC) ? LUSTRE_SYNC_FL : 0) | + ((iflags & S_NOATIME) ? LUSTRE_NOATIME_FL : 0) | + ((iflags & S_APPEND) ? LUSTRE_APPEND_FL : 0) | + ((iflags & S_DIRSYNC) ? LUSTRE_DIRSYNC_FL : 0) | + ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0)); +} + +/* 64 possible states */ +enum md_transient_state { + MS_RESTORE = (1 << 0), /* restore is running */ +}; + +struct mdt_body { + struct lu_fid mbo_fid1; + struct lu_fid mbo_fid2; + struct lustre_handle mbo_handle; + __u64 mbo_valid; + __u64 mbo_size; /* Offset, in the case of MDS_READPAGE */ + __s64 mbo_mtime; + __s64 mbo_atime; + __s64 mbo_ctime; + __u64 mbo_blocks; /* XID, in the case of MDS_READPAGE */ + __u64 mbo_ioepoch; + __u64 mbo_t_state; /* transient file state defined in + * enum md_transient_state + * was "ino" until 2.4.0 + */ + __u32 mbo_fsuid; + __u32 mbo_fsgid; + __u32 mbo_capability; + __u32 mbo_mode; + __u32 mbo_uid; + __u32 mbo_gid; + __u32 mbo_flags; /* LUSTRE_*_FL file attributes */ + __u32 mbo_rdev; + __u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */ + __u32 mbo_unused2; /* was "generation" until 2.4.0 */ + __u32 mbo_suppgid; + __u32 mbo_eadatasize; + __u32 mbo_aclsize; + __u32 mbo_max_mdsize; + __u32 mbo_unused3; /* was max_cookiesize until 2.8 */ + __u32 mbo_uid_h; /* high 32-bits of uid, for FUID */ + __u32 mbo_gid_h; /* high 32-bits of gid, for FUID */ + __u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */ + __u64 mbo_padding_6; + __u64 mbo_padding_7; + __u64 mbo_padding_8; + __u64 mbo_padding_9; + __u64 mbo_padding_10; +}; /* 216 */ + +struct mdt_ioepoch { + struct lustre_handle mio_handle; + __u64 mio_unused1; /* was ioepoch */ + __u32 mio_unused2; /* was flags */ + __u32 mio_padding; +}; + +/* permissions for md_perm.mp_perm */ +enum { + CFS_SETUID_PERM = 0x01, + CFS_SETGID_PERM = 0x02, + CFS_SETGRP_PERM = 0x04, +}; + +struct mdt_rec_setattr { + __u32 sa_opcode; + __u32 sa_cap; + __u32 sa_fsuid; + __u32 sa_fsuid_h; + __u32 sa_fsgid; + __u32 sa_fsgid_h; + __u32 sa_suppgid; + __u32 sa_suppgid_h; + __u32 sa_padding_1; + __u32 sa_padding_1_h; + struct lu_fid sa_fid; + __u64 sa_valid; + __u32 sa_uid; + __u32 sa_gid; + __u64 sa_size; + __u64 sa_blocks; + __s64 sa_mtime; + __s64 sa_atime; + __s64 sa_ctime; + __u32 sa_attr_flags; + __u32 sa_mode; + __u32 sa_bias; /* some operation flags */ + __u32 sa_padding_3; + __u32 sa_padding_4; + __u32 sa_padding_5; +}; + +/* + * Attribute flags used in mdt_rec_setattr::sa_valid. + * The kernel's #defines for ATTR_* should not be used over the network + * since the client and MDS may run different kernels (see bug 13828) + * Therefore, we should only use MDS_ATTR_* attributes for sa_valid. + */ +#define MDS_ATTR_MODE 0x1ULL /* = 1 */ +#define MDS_ATTR_UID 0x2ULL /* = 2 */ +#define MDS_ATTR_GID 0x4ULL /* = 4 */ +#define MDS_ATTR_SIZE 0x8ULL /* = 8 */ +#define MDS_ATTR_ATIME 0x10ULL /* = 16 */ +#define MDS_ATTR_MTIME 0x20ULL /* = 32 */ +#define MDS_ATTR_CTIME 0x40ULL /* = 64 */ +#define MDS_ATTR_ATIME_SET 0x80ULL /* = 128 */ +#define MDS_ATTR_MTIME_SET 0x100ULL /* = 256 */ +#define MDS_ATTR_FORCE 0x200ULL /* = 512, Not a change, but a change it */ +#define MDS_ATTR_ATTR_FLAG 0x400ULL /* = 1024 */ +#define MDS_ATTR_KILL_SUID 0x800ULL /* = 2048 */ +#define MDS_ATTR_KILL_SGID 0x1000ULL /* = 4096 */ +#define MDS_ATTR_CTIME_SET 0x2000ULL /* = 8192 */ +#define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path, + * ie O_TRUNC + */ +#define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */ + +#define MDS_FMODE_CLOSED 00000000 +#define MDS_FMODE_EXEC 00000004 +/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */ +/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */ +/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */ + +#define MDS_OPEN_CREATED 00000010 +#define MDS_OPEN_CROSS 00000020 + +#define MDS_OPEN_CREAT 00000100 +#define MDS_OPEN_EXCL 00000200 +#define MDS_OPEN_TRUNC 00001000 +#define MDS_OPEN_APPEND 00002000 +#define MDS_OPEN_SYNC 00010000 +#define MDS_OPEN_DIRECTORY 00200000 + +#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */ +#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ +#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ +#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file. + * We do not support JOIN FILE + * anymore, reserve this flags + * just for preventing such bit + * to be reused. + */ + +#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */ +#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ +#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ +#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */ +#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or + * hsm restore) */ +#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created + unlinked */ +#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease + * delegation, succeed if it's not + * being opened with conflict mode. + */ +#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */ + +#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \ + MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \ + MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \ + MDS_OPEN_RELEASE) + +enum mds_op_bias { + MDS_CHECK_SPLIT = 1 << 0, + MDS_CROSS_REF = 1 << 1, + MDS_VTX_BYPASS = 1 << 2, + MDS_PERM_BYPASS = 1 << 3, +/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */ + MDS_QUOTA_IGNORE = 1 << 5, + MDS_CLOSE_CLEANUP = 1 << 6, + MDS_KEEP_ORPHAN = 1 << 7, + MDS_RECOV_OPEN = 1 << 8, + MDS_DATA_MODIFIED = 1 << 9, + MDS_CREATE_VOLATILE = 1 << 10, + MDS_OWNEROVERRIDE = 1 << 11, + MDS_HSM_RELEASE = 1 << 12, + MDS_RENAME_MIGRATE = 1 << 13, + MDS_CLOSE_LAYOUT_SWAP = 1 << 14, +}; + +/* instance of mdt_reint_rec */ +struct mdt_rec_create { + __u32 cr_opcode; + __u32 cr_cap; + __u32 cr_fsuid; + __u32 cr_fsuid_h; + __u32 cr_fsgid; + __u32 cr_fsgid_h; + __u32 cr_suppgid1; + __u32 cr_suppgid1_h; + __u32 cr_suppgid2; + __u32 cr_suppgid2_h; + struct lu_fid cr_fid1; + struct lu_fid cr_fid2; + struct lustre_handle cr_old_handle; /* handle in case of open replay */ + __s64 cr_time; + __u64 cr_rdev; + __u64 cr_ioepoch; + __u64 cr_padding_1; /* rr_blocks */ + __u32 cr_mode; + __u32 cr_bias; + /* use of helpers set/get_mrc_cr_flags() is needed to access + * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to + * extend cr_flags size without breaking 1.8 compat + */ + __u32 cr_flags_l; /* for use with open, low 32 bits */ + __u32 cr_flags_h; /* for use with open, high 32 bits */ + __u32 cr_umask; /* umask for create */ + __u32 cr_padding_4; /* rr_padding_4 */ +}; + +/* instance of mdt_reint_rec */ +struct mdt_rec_link { + __u32 lk_opcode; + __u32 lk_cap; + __u32 lk_fsuid; + __u32 lk_fsuid_h; + __u32 lk_fsgid; + __u32 lk_fsgid_h; + __u32 lk_suppgid1; + __u32 lk_suppgid1_h; + __u32 lk_suppgid2; + __u32 lk_suppgid2_h; + struct lu_fid lk_fid1; + struct lu_fid lk_fid2; + __s64 lk_time; + __u64 lk_padding_1; /* rr_atime */ + __u64 lk_padding_2; /* rr_ctime */ + __u64 lk_padding_3; /* rr_size */ + __u64 lk_padding_4; /* rr_blocks */ + __u32 lk_bias; + __u32 lk_padding_5; /* rr_mode */ + __u32 lk_padding_6; /* rr_flags */ + __u32 lk_padding_7; /* rr_padding_2 */ + __u32 lk_padding_8; /* rr_padding_3 */ + __u32 lk_padding_9; /* rr_padding_4 */ +}; + +/* instance of mdt_reint_rec */ +struct mdt_rec_unlink { + __u32 ul_opcode; + __u32 ul_cap; + __u32 ul_fsuid; + __u32 ul_fsuid_h; + __u32 ul_fsgid; + __u32 ul_fsgid_h; + __u32 ul_suppgid1; + __u32 ul_suppgid1_h; + __u32 ul_suppgid2; + __u32 ul_suppgid2_h; + struct lu_fid ul_fid1; + struct lu_fid ul_fid2; + __s64 ul_time; + __u64 ul_padding_2; /* rr_atime */ + __u64 ul_padding_3; /* rr_ctime */ + __u64 ul_padding_4; /* rr_size */ + __u64 ul_padding_5; /* rr_blocks */ + __u32 ul_bias; + __u32 ul_mode; + __u32 ul_padding_6; /* rr_flags */ + __u32 ul_padding_7; /* rr_padding_2 */ + __u32 ul_padding_8; /* rr_padding_3 */ + __u32 ul_padding_9; /* rr_padding_4 */ +}; + +/* instance of mdt_reint_rec */ +struct mdt_rec_rename { + __u32 rn_opcode; + __u32 rn_cap; + __u32 rn_fsuid; + __u32 rn_fsuid_h; + __u32 rn_fsgid; + __u32 rn_fsgid_h; + __u32 rn_suppgid1; + __u32 rn_suppgid1_h; + __u32 rn_suppgid2; + __u32 rn_suppgid2_h; + struct lu_fid rn_fid1; + struct lu_fid rn_fid2; + __s64 rn_time; + __u64 rn_padding_1; /* rr_atime */ + __u64 rn_padding_2; /* rr_ctime */ + __u64 rn_padding_3; /* rr_size */ + __u64 rn_padding_4; /* rr_blocks */ + __u32 rn_bias; /* some operation flags */ + __u32 rn_mode; /* cross-ref rename has mode */ + __u32 rn_padding_5; /* rr_flags */ + __u32 rn_padding_6; /* rr_padding_2 */ + __u32 rn_padding_7; /* rr_padding_3 */ + __u32 rn_padding_8; /* rr_padding_4 */ +}; + +/* instance of mdt_reint_rec */ +struct mdt_rec_setxattr { + __u32 sx_opcode; + __u32 sx_cap; + __u32 sx_fsuid; + __u32 sx_fsuid_h; + __u32 sx_fsgid; + __u32 sx_fsgid_h; + __u32 sx_suppgid1; + __u32 sx_suppgid1_h; + __u32 sx_suppgid2; + __u32 sx_suppgid2_h; + struct lu_fid sx_fid; + __u64 sx_padding_1; /* These three are rr_fid2 */ + __u32 sx_padding_2; + __u32 sx_padding_3; + __u64 sx_valid; + __s64 sx_time; + __u64 sx_padding_5; /* rr_ctime */ + __u64 sx_padding_6; /* rr_size */ + __u64 sx_padding_7; /* rr_blocks */ + __u32 sx_size; + __u32 sx_flags; + __u32 sx_padding_8; /* rr_flags */ + __u32 sx_padding_9; /* rr_padding_2 */ + __u32 sx_padding_10; /* rr_padding_3 */ + __u32 sx_padding_11; /* rr_padding_4 */ +}; + +/* + * mdt_rec_reint is the template for all mdt_reint_xxx structures. + * Do NOT change the size of various members, otherwise the value + * will be broken in lustre_swab_mdt_rec_reint(). + * + * If you add new members in other mdt_reint_xxx structures and need to use the + * rr_padding_x fields, then update lustre_swab_mdt_rec_reint() also. + */ +struct mdt_rec_reint { + __u32 rr_opcode; + __u32 rr_cap; + __u32 rr_fsuid; + __u32 rr_fsuid_h; + __u32 rr_fsgid; + __u32 rr_fsgid_h; + __u32 rr_suppgid1; + __u32 rr_suppgid1_h; + __u32 rr_suppgid2; + __u32 rr_suppgid2_h; + struct lu_fid rr_fid1; + struct lu_fid rr_fid2; + __s64 rr_mtime; + __s64 rr_atime; + __s64 rr_ctime; + __u64 rr_size; + __u64 rr_blocks; + __u32 rr_bias; + __u32 rr_mode; + __u32 rr_flags; + __u32 rr_flags_h; + __u32 rr_umask; + __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ +}; + +/* lmv structures */ +struct lmv_desc { + __u32 ld_tgt_count; /* how many MDS's */ + __u32 ld_active_tgt_count; /* how many active */ + __u32 ld_default_stripe_count; /* how many objects are used */ + __u32 ld_pattern; /* default hash pattern */ + __u64 ld_default_hash_size; + __u64 ld_padding_1; /* also fix lustre_swab_lmv_desc */ + __u32 ld_padding_2; /* also fix lustre_swab_lmv_desc */ + __u32 ld_qos_maxage; /* in second */ + __u32 ld_padding_3; /* also fix lustre_swab_lmv_desc */ + __u32 ld_padding_4; /* also fix lustre_swab_lmv_desc */ + struct obd_uuid ld_uuid; +}; + +/* LMV layout EA, and it will be stored both in master and slave object */ +struct lmv_mds_md_v1 { + __u32 lmv_magic; + __u32 lmv_stripe_count; + __u32 lmv_master_mdt_index; /* On master object, it is master + * MDT index, on slave object, it + * is stripe index of the slave obj + */ + __u32 lmv_hash_type; /* dir stripe policy, i.e. indicate + * which hash function to be used, + * Note: only lower 16 bits is being + * used for now. Higher 16 bits will + * be used to mark the object status, + * for example migrating or dead. + */ + __u32 lmv_layout_version; /* Used for directory restriping */ + __u32 lmv_padding1; + __u64 lmv_padding2; + __u64 lmv_padding3; + char lmv_pool_name[LOV_MAXPOOLNAME + 1];/* pool name */ + struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */ +}; + +#define LMV_MAGIC_V1 0x0CD20CD0 /* normal stripe lmv magic */ +#define LMV_MAGIC LMV_MAGIC_V1 + +/* #define LMV_USER_MAGIC 0x0CD30CD0 */ +#define LMV_MAGIC_STRIPE 0x0CD40CD0 /* magic for dir sub_stripe */ + +/* + *Right now only the lower part(0-16bits) of lmv_hash_type is being used, + * and the higher part will be the flag to indicate the status of object, + * for example the object is being migrated. And the hash function + * might be interpreted differently with different flags. + */ +#define LMV_HASH_TYPE_MASK 0x0000ffff + +#define LMV_HASH_FLAG_MIGRATION 0x80000000 +#define LMV_HASH_FLAG_DEAD 0x40000000 + +/** + * The FNV-1a hash algorithm is as follows: + * hash = FNV_offset_basis + * for each octet_of_data to be hashed + * hash = hash XOR octet_of_data + * hash = hash × FNV_prime + * return hash + * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1a_hash + * + * http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source + * FNV_prime is 2^40 + 2^8 + 0xb3 = 0x100000001b3ULL + **/ +#define LUSTRE_FNV_1A_64_PRIME 0x100000001b3ULL +#define LUSTRE_FNV_1A_64_OFFSET_BIAS 0xcbf29ce484222325ULL +static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size) +{ + __u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS; + const unsigned char *p = buf; + size_t i; + + for (i = 0; i < size; i++) { + hash ^= p[i]; + hash *= LUSTRE_FNV_1A_64_PRIME; + } + + return hash; +} + +union lmv_mds_md { + __u32 lmv_magic; + struct lmv_mds_md_v1 lmv_md_v1; + struct lmv_user_md lmv_user_md; +}; + +static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic) +{ + ssize_t len = -EINVAL; + + switch (lmm_magic) { + case LMV_MAGIC_V1: { + struct lmv_mds_md_v1 *lmm1; + + len = sizeof(*lmm1); + len += stripe_count * sizeof(lmm1->lmv_stripe_fids[0]); + break; } + default: + break; + } + return len; +} + +static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm) +{ + switch (__le32_to_cpu(lmm->lmv_magic)) { + case LMV_MAGIC_V1: + return __le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count); + case LMV_USER_MAGIC: + return __le32_to_cpu(lmm->lmv_user_md.lum_stripe_count); + default: + return -EINVAL; + } +} + +enum fld_rpc_opc { + FLD_QUERY = 900, + FLD_READ = 901, + FLD_LAST_OPC, + FLD_FIRST_OPC = FLD_QUERY +}; + +enum seq_rpc_opc { + SEQ_QUERY = 700, + SEQ_LAST_OPC, + SEQ_FIRST_OPC = SEQ_QUERY +}; + +enum seq_op { + SEQ_ALLOC_SUPER = 0, + SEQ_ALLOC_META = 1 +}; + +enum fld_op { + FLD_CREATE = 0, + FLD_DELETE = 1, + FLD_LOOKUP = 2, +}; + +/* + * LOV data structures + */ + +#define LOV_MAX_UUID_BUFFER_SIZE 8192 +/* The size of the buffer the lov/mdc reserves for the + * array of UUIDs returned by the MDS. With the current + * protocol, this will limit the max number of OSTs per LOV + */ + +#define LOV_DESC_MAGIC 0xB0CCDE5C +#define LOV_DESC_QOS_MAXAGE_DEFAULT 5 /* Seconds */ +#define LOV_DESC_STRIPE_SIZE_DEFAULT (1 << LNET_MTU_BITS) + +/* LOV settings descriptor (should only contain static info) */ +struct lov_desc { + __u32 ld_tgt_count; /* how many OBD's */ + __u32 ld_active_tgt_count; /* how many active */ + __u32 ld_default_stripe_count; /* how many objects are used */ + __u32 ld_pattern; /* default PATTERN_RAID0 */ + __u64 ld_default_stripe_size; /* in bytes */ + __u64 ld_default_stripe_offset; /* in bytes */ + __u32 ld_padding_0; /* unused */ + __u32 ld_qos_maxage; /* in second */ + __u32 ld_padding_1; /* also fix lustre_swab_lov_desc */ + __u32 ld_padding_2; /* also fix lustre_swab_lov_desc */ + struct obd_uuid ld_uuid; +}; + +#define ld_magic ld_active_tgt_count /* for swabbing from llogs */ + +/* + * LDLM requests: + */ +/* opcodes -- MUST be distinct from OST/MDS opcodes */ +enum ldlm_cmd { + LDLM_ENQUEUE = 101, + LDLM_CONVERT = 102, + LDLM_CANCEL = 103, + LDLM_BL_CALLBACK = 104, + LDLM_CP_CALLBACK = 105, + LDLM_GL_CALLBACK = 106, + LDLM_SET_INFO = 107, + LDLM_LAST_OPC +}; +#define LDLM_FIRST_OPC LDLM_ENQUEUE + +#define RES_NAME_SIZE 4 +struct ldlm_res_id { + __u64 name[RES_NAME_SIZE]; +}; + +#define DLDLMRES "[%#llx:%#llx:%#llx].%llx" +#define PLDLMRES(res) (res)->lr_name.name[0], (res)->lr_name.name[1], \ + (res)->lr_name.name[2], (res)->lr_name.name[3] + +/* lock types */ +enum ldlm_mode { + LCK_MINMODE = 0, + LCK_EX = 1, + LCK_PW = 2, + LCK_PR = 4, + LCK_CW = 8, + LCK_CR = 16, + LCK_NL = 32, + LCK_GROUP = 64, + LCK_COS = 128, + LCK_MAXMODE +}; + +#define LCK_MODE_NUM 8 + +enum ldlm_type { + LDLM_PLAIN = 10, + LDLM_EXTENT = 11, + LDLM_FLOCK = 12, + LDLM_IBITS = 13, + LDLM_MAX_TYPE +}; + +#define LDLM_MIN_TYPE LDLM_PLAIN + +struct ldlm_extent { + __u64 start; + __u64 end; + __u64 gid; +}; + +struct ldlm_inodebits { + __u64 bits; +}; + +struct ldlm_flock_wire { + __u64 lfw_start; + __u64 lfw_end; + __u64 lfw_owner; + __u32 lfw_padding; + __u32 lfw_pid; +}; + +/* it's important that the fields of the ldlm_extent structure match + * the first fields of the ldlm_flock structure because there is only + * one ldlm_swab routine to process the ldlm_policy_data_t union. if + * this ever changes we will need to swab the union differently based + * on the resource type. + */ + +union ldlm_wire_policy_data { + struct ldlm_extent l_extent; + struct ldlm_flock_wire l_flock; + struct ldlm_inodebits l_inodebits; +}; + +union ldlm_gl_desc { + struct ldlm_gl_lquota_desc lquota_desc; +}; + +enum ldlm_intent_flags { + IT_OPEN = 0x00000001, + IT_CREAT = 0x00000002, + IT_OPEN_CREAT = 0x00000003, + IT_READDIR = 0x00000004, + IT_GETATTR = 0x00000008, + IT_LOOKUP = 0x00000010, + IT_UNLINK = 0x00000020, + IT_TRUNC = 0x00000040, + IT_GETXATTR = 0x00000080, + IT_EXEC = 0x00000100, + IT_PIN = 0x00000200, + IT_LAYOUT = 0x00000400, + IT_QUOTA_DQACQ = 0x00000800, + IT_QUOTA_CONN = 0x00001000, + IT_SETXATTR = 0x00002000, +}; + +struct ldlm_intent { + __u64 opc; +}; + +struct ldlm_resource_desc { + enum ldlm_type lr_type; + __u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */ + struct ldlm_res_id lr_name; +}; + +struct ldlm_lock_desc { + struct ldlm_resource_desc l_resource; + enum ldlm_mode l_req_mode; + enum ldlm_mode l_granted_mode; + union ldlm_wire_policy_data l_policy_data; +}; + +#define LDLM_LOCKREQ_HANDLES 2 +#define LDLM_ENQUEUE_CANCEL_OFF 1 + +struct ldlm_request { + __u32 lock_flags; + __u32 lock_count; + struct ldlm_lock_desc lock_desc; + struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES]; +}; + +struct ldlm_reply { + __u32 lock_flags; + __u32 lock_padding; /* also fix lustre_swab_ldlm_reply */ + struct ldlm_lock_desc lock_desc; + struct lustre_handle lock_handle; + __u64 lock_policy_res1; + __u64 lock_policy_res2; +}; + +#define ldlm_flags_to_wire(flags) ((__u32)(flags)) +#define ldlm_flags_from_wire(flags) ((__u64)(flags)) + +/* + * Opcodes for mountconf (mgs and mgc) + */ +enum mgs_cmd { + MGS_CONNECT = 250, + MGS_DISCONNECT, + MGS_EXCEPTION, /* node died, etc. */ + MGS_TARGET_REG, /* whenever target starts up */ + MGS_TARGET_DEL, + MGS_SET_INFO, + MGS_CONFIG_READ, + MGS_LAST_OPC +}; +#define MGS_FIRST_OPC MGS_CONNECT + +#define MGS_PARAM_MAXLEN 1024 +#define KEY_SET_INFO "set_info" + +struct mgs_send_param { + char mgs_param[MGS_PARAM_MAXLEN]; +}; + +/* We pass this info to the MGS so it can write config logs */ +#define MTI_NAME_MAXLEN 64 +#define MTI_PARAM_MAXLEN 4096 +#define MTI_NIDS_MAX 32 +struct mgs_target_info { + __u32 mti_lustre_ver; + __u32 mti_stripe_index; + __u32 mti_config_ver; + __u32 mti_flags; + __u32 mti_nid_count; + __u32 mti_instance; /* Running instance of target */ + char mti_fsname[MTI_NAME_MAXLEN]; + char mti_svname[MTI_NAME_MAXLEN]; + char mti_uuid[sizeof(struct obd_uuid)]; + __u64 mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t)*/ + char mti_params[MTI_PARAM_MAXLEN]; +}; + +struct mgs_nidtbl_entry { + __u64 mne_version; /* table version of this entry */ + __u32 mne_instance; /* target instance # */ + __u32 mne_index; /* target index */ + __u32 mne_length; /* length of this entry - by bytes */ + __u8 mne_type; /* target type LDD_F_SV_TYPE_OST/MDT */ + __u8 mne_nid_type; /* type of nid(mbz). for ipv6. */ + __u8 mne_nid_size; /* size of each NID, by bytes */ + __u8 mne_nid_count; /* # of NIDs in buffer */ + union { + lnet_nid_t nids[0]; /* variable size buffer for NIDs. */ + } u; +}; + +struct mgs_config_body { + char mcb_name[MTI_NAME_MAXLEN]; /* logname */ + __u64 mcb_offset; /* next index of config log to request */ + __u16 mcb_type; /* type of log: CONFIG_T_[CONFIG|RECOVER] */ + __u8 mcb_reserved; + __u8 mcb_bits; /* bits unit size of config log */ + __u32 mcb_units; /* # of units for bulk transfer */ +}; + +struct mgs_config_res { + __u64 mcr_offset; /* index of last config log */ + __u64 mcr_size; /* size of the log */ +}; + +/* Config marker flags (in config log) */ +#define CM_START 0x01 +#define CM_END 0x02 +#define CM_SKIP 0x04 +#define CM_UPGRADE146 0x08 +#define CM_EXCLUDE 0x10 +#define CM_START_SKIP (CM_START | CM_SKIP) + +struct cfg_marker { + __u32 cm_step; /* aka config version */ + __u32 cm_flags; + __u32 cm_vers; /* lustre release version number */ + __u32 cm_padding; /* 64 bit align */ + __s64 cm_createtime; /*when this record was first created */ + __s64 cm_canceltime; /*when this record is no longer valid*/ + char cm_tgtname[MTI_NAME_MAXLEN]; + char cm_comment[MTI_NAME_MAXLEN]; +}; + +/* + * Opcodes for multiple servers. + */ + +enum obd_cmd { + OBD_PING = 400, + OBD_LOG_CANCEL, + OBD_QC_CALLBACK, /* not used since 2.4 */ + OBD_IDX_READ, + OBD_LAST_OPC +}; +#define OBD_FIRST_OPC OBD_PING + +/** + * llog contexts indices. + * + * There is compatibility problem with indexes below, they are not + * continuous and must keep their numbers for compatibility needs. + * See LU-5218 for details. + */ +enum llog_ctxt_id { + LLOG_CONFIG_ORIG_CTXT = 0, + LLOG_CONFIG_REPL_CTXT = 1, + LLOG_MDS_OST_ORIG_CTXT = 2, + LLOG_MDS_OST_REPL_CTXT = 3, /* kept just to avoid re-assignment */ + LLOG_SIZE_ORIG_CTXT = 4, + LLOG_SIZE_REPL_CTXT = 5, + LLOG_TEST_ORIG_CTXT = 8, + LLOG_TEST_REPL_CTXT = 9, /* kept just to avoid re-assignment */ + LLOG_CHANGELOG_ORIG_CTXT = 12, /**< changelog generation on mdd */ + LLOG_CHANGELOG_REPL_CTXT = 13, /**< changelog access on clients */ + /* for multiple changelog consumers */ + LLOG_CHANGELOG_USER_ORIG_CTXT = 14, + LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */ + LLOG_MAX_CTXTS +}; + +/** Identifier for a single log object */ +struct llog_logid { + struct ost_id lgl_oi; + __u32 lgl_ogen; +} __packed; + +/** Records written to the CATALOGS list */ +#define CATLIST "CATALOGS" +struct llog_catid { + struct llog_logid lci_logid; + __u32 lci_padding1; + __u32 lci_padding2; + __u32 lci_padding3; +} __packed; + +/* Log data record types - there is no specific reason that these need to + * be related to the RPC opcodes, but no reason not to (may be handy later?) + */ +#define LLOG_OP_MAGIC 0x10600000 +#define LLOG_OP_MASK 0xfff00000 + +enum llog_op_type { + LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000, + OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00, + /* OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, never used */ + MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | + REINT_UNLINK, /* obsolete after 2.5.0 */ + MDS_UNLINK64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | + REINT_UNLINK, + /* MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x12401, obsolete 1.8.0 */ + MDS_SETATTR64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | + REINT_SETATTR, + OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, + /* PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, obsolete 1.4.0 */ + LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, + /* LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, obsolete 1.8.0 */ + CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000, + CHANGELOG_USER_REC = LLOG_OP_MAGIC | 0x70000, + HSM_AGENT_REC = LLOG_OP_MAGIC | 0x80000, + LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, + LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, +}; + +#define LLOG_REC_HDR_NEEDS_SWABBING(r) \ + (((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC)) + +/** Log record header - stored in little endian order. + * Each record must start with this struct, end with a llog_rec_tail, + * and be a multiple of 256 bits in size. + */ +struct llog_rec_hdr { + __u32 lrh_len; + __u32 lrh_index; + __u32 lrh_type; + __u32 lrh_id; +}; + +struct llog_rec_tail { + __u32 lrt_len; + __u32 lrt_index; +}; + +/* Where data follow just after header */ +#define REC_DATA(ptr) \ + ((void *)((char *)ptr + sizeof(struct llog_rec_hdr))) + +#define REC_DATA_LEN(rec) \ + (rec->lrh_len - sizeof(struct llog_rec_hdr) - \ + sizeof(struct llog_rec_tail)) + +struct llog_logid_rec { + struct llog_rec_hdr lid_hdr; + struct llog_logid lid_id; + __u32 lid_padding1; + __u64 lid_padding2; + __u64 lid_padding3; + struct llog_rec_tail lid_tail; +} __packed; + +struct llog_unlink_rec { + struct llog_rec_hdr lur_hdr; + __u64 lur_oid; + __u32 lur_oseq; + __u32 lur_count; + struct llog_rec_tail lur_tail; +} __packed; + +struct llog_unlink64_rec { + struct llog_rec_hdr lur_hdr; + struct lu_fid lur_fid; + __u32 lur_count; /* to destroy the lost precreated */ + __u32 lur_padding1; + __u64 lur_padding2; + __u64 lur_padding3; + struct llog_rec_tail lur_tail; +} __packed; + +struct llog_setattr64_rec { + struct llog_rec_hdr lsr_hdr; + struct ost_id lsr_oi; + __u32 lsr_uid; + __u32 lsr_uid_h; + __u32 lsr_gid; + __u32 lsr_gid_h; + __u64 lsr_valid; + struct llog_rec_tail lsr_tail; +} __packed; + +struct llog_size_change_rec { + struct llog_rec_hdr lsc_hdr; + struct ll_fid lsc_fid; + __u32 lsc_ioepoch; + __u32 lsc_padding1; + __u64 lsc_padding2; + __u64 lsc_padding3; + struct llog_rec_tail lsc_tail; +} __packed; + +/* changelog llog name, needed by client replicators */ +#define CHANGELOG_CATALOG "changelog_catalog" + +struct changelog_setinfo { + __u64 cs_recno; + __u32 cs_id; +} __packed; + +/** changelog record */ +struct llog_changelog_rec { + struct llog_rec_hdr cr_hdr; + struct changelog_rec cr; /**< Variable length field */ + struct llog_rec_tail cr_do_not_use; /**< for_sizezof_only */ +} __packed; + +struct llog_changelog_user_rec { + struct llog_rec_hdr cur_hdr; + __u32 cur_id; + __u32 cur_padding; + __u64 cur_endrec; + struct llog_rec_tail cur_tail; +} __packed; + +enum agent_req_status { + ARS_WAITING, + ARS_STARTED, + ARS_FAILED, + ARS_CANCELED, + ARS_SUCCEED, +}; + +static inline const char *agent_req_status2name(const enum agent_req_status ars) +{ + switch (ars) { + case ARS_WAITING: + return "WAITING"; + case ARS_STARTED: + return "STARTED"; + case ARS_FAILED: + return "FAILED"; + case ARS_CANCELED: + return "CANCELED"; + case ARS_SUCCEED: + return "SUCCEED"; + default: + return "UNKNOWN"; + } +} + +struct llog_agent_req_rec { + struct llog_rec_hdr arr_hdr; /**< record header */ + __u32 arr_status; /**< status of the request */ + /* must match enum + * agent_req_status + */ + __u32 arr_archive_id; /**< backend archive number */ + __u64 arr_flags; /**< req flags */ + __u64 arr_compound_id;/**< compound cookie */ + __u64 arr_req_create; /**< req. creation time */ + __u64 arr_req_change; /**< req. status change time */ + struct hsm_action_item arr_hai; /**< req. to the agent */ + struct llog_rec_tail arr_tail; /**< record tail for_sizezof_only */ +} __packed; + +/* Old llog gen for compatibility */ +struct llog_gen { + __u64 mnt_cnt; + __u64 conn_cnt; +} __packed; + +struct llog_gen_rec { + struct llog_rec_hdr lgr_hdr; + struct llog_gen lgr_gen; + __u64 padding1; + __u64 padding2; + __u64 padding3; + struct llog_rec_tail lgr_tail; +}; + +/* flags for the logs */ +enum llog_flag { + LLOG_F_ZAP_WHEN_EMPTY = 0x1, + LLOG_F_IS_CAT = 0x2, + LLOG_F_IS_PLAIN = 0x4, + LLOG_F_EXT_JOBID = 0x8, + LLOG_F_IS_FIXSIZE = 0x10, + + /* + * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from + * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here, + * because the catlog record is usually fixed size, but its plain + * log record can be variable + */ + LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID, +}; + +/* On-disk header structure of each log object, stored in little endian order */ +#define LLOG_MIN_CHUNK_SIZE 8192 +#define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) + + * sizeof(llh_tail) - sizeof(llh_bitmap) + */ +#define LLOG_BITMAP_BYTES (LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE) +#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */ + +/* flags for the logs */ +struct llog_log_hdr { + struct llog_rec_hdr llh_hdr; + __s64 llh_timestamp; + __u32 llh_count; + __u32 llh_bitmap_offset; + __u32 llh_size; + __u32 llh_flags; + __u32 llh_cat_idx; + /* for a catalog the first plain slot is next to it */ + struct obd_uuid llh_tgtuuid; + __u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23]; + /* These fields must always be at the end of the llog_log_hdr. + * Note: llh_bitmap size is variable because llog chunk size could be + * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192 + * bytes, and the real size is stored in llh_hdr.lrh_len, which means + * llh_tail should only be referred by LLOG_HDR_TAIL(). + * But this structure is also used by client/server llog interface + * (see llog_client.c), it will be kept in its original way to avoid + * compatibility issue. + */ + __u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)]; + struct llog_rec_tail llh_tail; +} __packed; + +#undef LLOG_HEADER_SIZE +#undef LLOG_BITMAP_BYTES + +#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \ + llh->llh_bitmap_offset - \ + sizeof(llh->llh_tail)) * 8) +#define LLOG_HDR_BITMAP(llh) (__u32 *)((char *)(llh) + \ + (llh)->llh_bitmap_offset) +#define LLOG_HDR_TAIL(llh) ((struct llog_rec_tail *)((char *)llh + \ + llh->llh_hdr.lrh_len - \ + sizeof(llh->llh_tail))) + +/** log cookies are used to reference a specific log file and a record + * therein + */ +struct llog_cookie { + struct llog_logid lgc_lgl; + __u32 lgc_subsys; + __u32 lgc_index; + __u32 lgc_padding; +} __packed; + +/** llog protocol */ +enum llogd_rpc_ops { + LLOG_ORIGIN_HANDLE_CREATE = 501, + LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502, + LLOG_ORIGIN_HANDLE_READ_HEADER = 503, + LLOG_ORIGIN_HANDLE_WRITE_REC = 504, + LLOG_ORIGIN_HANDLE_CLOSE = 505, + LLOG_ORIGIN_CONNECT = 506, + LLOG_CATINFO = 507, /* deprecated */ + LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, + LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ + LLOG_LAST_OPC, + LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE +}; + +struct llogd_body { + struct llog_logid lgd_logid; + __u32 lgd_ctxt_idx; + __u32 lgd_llh_flags; + __u32 lgd_index; + __u32 lgd_saved_index; + __u32 lgd_len; + __u64 lgd_cur_offset; +} __packed; + +struct llogd_conn_body { + struct llog_gen lgdc_gen; + struct llog_logid lgdc_logid; + __u32 lgdc_ctxt_idx; +} __packed; + +/* Note: 64-bit types are 64-bit aligned in structure */ +struct obdo { + __u64 o_valid; /* hot fields in this obdo */ + struct ost_id o_oi; + __u64 o_parent_seq; + __u64 o_size; /* o_size-o_blocks == ost_lvb */ + __s64 o_mtime; + __s64 o_atime; + __s64 o_ctime; + __u64 o_blocks; /* brw: cli sent cached bytes */ + __u64 o_grant; + + /* 32-bit fields start here: keep an even number of them via padding */ + __u32 o_blksize; /* optimal IO blocksize */ + __u32 o_mode; /* brw: cli sent cache remain */ + __u32 o_uid; + __u32 o_gid; + __u32 o_flags; + __u32 o_nlink; /* brw: checksum */ + __u32 o_parent_oid; + __u32 o_misc; /* brw: o_dropped */ + + __u64 o_ioepoch; /* epoch in ost writes */ + __u32 o_stripe_idx; /* holds stripe idx */ + __u32 o_parent_ver; + struct lustre_handle o_handle; /* brw: lock handle to prolong locks + */ + struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS, + * obsolete in 2.8, reused in OSP + */ + __u32 o_uid_h; + __u32 o_gid_h; + + __u64 o_data_version; /* getattr: sum of iversion for + * each stripe. + * brw: grant space consumed on + * the client for the write + */ + __u64 o_padding_4; + __u64 o_padding_5; + __u64 o_padding_6; +}; + +#define o_dirty o_blocks +#define o_undirty o_mode +#define o_dropped o_misc +#define o_cksum o_nlink +#define o_grant_used o_data_version + +/* request structure for OST's */ +struct ost_body { + struct obdo oa; +}; + +/* Key for FIEMAP to be used in get_info calls */ +struct ll_fiemap_info_key { + char lfik_name[8]; + struct obdo lfik_oa; + struct fiemap lfik_fiemap; +}; + +/* security opcodes */ +enum sec_cmd { + SEC_CTX_INIT = 801, + SEC_CTX_INIT_CONT = 802, + SEC_CTX_FINI = 803, + SEC_LAST_OPC, + SEC_FIRST_OPC = SEC_CTX_INIT +}; + +/* + * capa related definitions + */ +#define CAPA_HMAC_MAX_LEN 64 +#define CAPA_HMAC_KEY_MAX_LEN 56 + +/* NB take care when changing the sequence of elements this struct, + * because the offset info is used in find_capa() + */ +struct lustre_capa { + struct lu_fid lc_fid; /** fid */ + __u64 lc_opc; /** operations allowed */ + __u64 lc_uid; /** file owner */ + __u64 lc_gid; /** file group */ + __u32 lc_flags; /** HMAC algorithm & flags */ + __u32 lc_keyid; /** key# used for the capability */ + __u32 lc_timeout; /** capa timeout value (sec) */ +/* FIXME: y2038 time_t overflow: */ + __u32 lc_expiry; /** expiry time (sec) */ + __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */ +} __packed; + +/** lustre_capa::lc_opc */ +enum { + CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */ + CAPA_OPC_BODY_READ = 1 << 1, /**< read object data */ + CAPA_OPC_INDEX_LOOKUP = 1 << 2, /**< lookup object fid */ + CAPA_OPC_INDEX_INSERT = 1 << 3, /**< insert object fid */ + CAPA_OPC_INDEX_DELETE = 1 << 4, /**< delete object fid */ + CAPA_OPC_OSS_WRITE = 1 << 5, /**< write oss object data */ + CAPA_OPC_OSS_READ = 1 << 6, /**< read oss object data */ + CAPA_OPC_OSS_TRUNC = 1 << 7, /**< truncate oss object */ + CAPA_OPC_OSS_DESTROY = 1 << 8, /**< destroy oss object */ + CAPA_OPC_META_WRITE = 1 << 9, /**< write object meta data */ + CAPA_OPC_META_READ = 1 << 10, /**< read object meta data */ +}; + +#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE) +#define CAPA_OPC_MDS_ONLY \ + (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \ + CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE) +#define CAPA_OPC_OSS_ONLY \ + (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC | \ + CAPA_OPC_OSS_DESTROY) +#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY +#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY) + +struct lustre_capa_key { + __u64 lk_seq; /**< mds# */ + __u32 lk_keyid; /**< key# */ + __u32 lk_padding; + __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /**< key */ +} __packed; + +/** The link ea holds 1 \a link_ea_entry for each hardlink */ +#define LINK_EA_MAGIC 0x11EAF1DFUL +struct link_ea_header { + __u32 leh_magic; + __u32 leh_reccount; + __u64 leh_len; /* total size */ + __u32 leh_overflow_time; + __u32 leh_padding; +}; + +/** Hardlink data is name and parent fid. + * Stored in this crazy struct for maximum packing and endian-neutrality + */ +struct link_ea_entry { + /** __u16 stored big-endian, unaligned */ + unsigned char lee_reclen[2]; + unsigned char lee_parent_fid[sizeof(struct lu_fid)]; + char lee_name[0]; +} __packed; + +/** fid2path request/reply structure */ +struct getinfo_fid2path { + struct lu_fid gf_fid; + __u64 gf_recno; + __u32 gf_linkno; + __u32 gf_pathlen; + char gf_path[0]; +} __packed; + +/** path2parent request/reply structures */ +struct getparent { + struct lu_fid gp_fid; /**< parent FID */ + __u32 gp_linkno; /**< hardlink number */ + __u32 gp_name_size; /**< size of the name field */ + char gp_name[0]; /**< zero-terminated link name */ +} __packed; + +enum { + LAYOUT_INTENT_ACCESS = 0, + LAYOUT_INTENT_READ = 1, + LAYOUT_INTENT_WRITE = 2, + LAYOUT_INTENT_GLIMPSE = 3, + LAYOUT_INTENT_TRUNC = 4, + LAYOUT_INTENT_RELEASE = 5, + LAYOUT_INTENT_RESTORE = 6 +}; + +/* enqueue layout lock with intent */ +struct layout_intent { + __u32 li_opc; /* intent operation for enqueue, read, write etc */ + __u32 li_flags; + __u64 li_start; + __u64 li_end; +}; + +/** + * On the wire version of hsm_progress structure. + * + * Contains the userspace hsm_progress and some internal fields. + */ +struct hsm_progress_kernel { + /* Field taken from struct hsm_progress */ + struct lu_fid hpk_fid; + __u64 hpk_cookie; + struct hsm_extent hpk_extent; + __u16 hpk_flags; + __u16 hpk_errval; /* positive val */ + __u32 hpk_padding1; + /* Additional fields */ + __u64 hpk_data_version; + __u64 hpk_padding2; +} __packed; + +/** layout swap request structure + * fid1 and fid2 are in mdt_body + */ +struct mdc_swap_layouts { + __u64 msl_flags; +} __packed; + +struct close_data { + struct lustre_handle cd_handle; + struct lu_fid cd_fid; + __u64 cd_data_version; + __u64 cd_reserved[8]; +}; + +#endif +/** @} lustreidl */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h new file mode 100644 index 000000000000..9590864e0b50 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h @@ -0,0 +1,231 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +#ifndef _UAPI_LUSTRE_IOCTL_H_ +#define _UAPI_LUSTRE_IOCTL_H_ + +#include <linux/ioctl.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <uapi/linux/lustre/lustre_idl.h> + +#if !defined(__KERNEL__) && !defined(LUSTRE_UTILS) +# error This file is for Lustre internal use only. +#endif + +enum md_echo_cmd { + ECHO_MD_CREATE = 1, /* Open/Create file on MDT */ + ECHO_MD_MKDIR = 2, /* Mkdir on MDT */ + ECHO_MD_DESTROY = 3, /* Unlink file on MDT */ + ECHO_MD_RMDIR = 4, /* Rmdir on MDT */ + ECHO_MD_LOOKUP = 5, /* Lookup on MDT */ + ECHO_MD_GETATTR = 6, /* Getattr on MDT */ + ECHO_MD_SETATTR = 7, /* Setattr on MDT */ + ECHO_MD_ALLOC_FID = 8, /* Get FIDs from MDT */ +}; + +#define OBD_DEV_ID 1 +#define OBD_DEV_NAME "obd" +#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME +#define OBD_DEV_MAJOR 10 +#define OBD_DEV_MINOR 241 + +#define OBD_IOCTL_VERSION 0x00010004 +#define OBD_DEV_BY_DEVNAME 0xffffd0de + +struct obd_ioctl_data { + __u32 ioc_len; + __u32 ioc_version; + + union { + __u64 ioc_cookie; + __u64 ioc_u64_1; + }; + union { + __u32 ioc_conn1; + __u32 ioc_u32_1; + }; + union { + __u32 ioc_conn2; + __u32 ioc_u32_2; + }; + + struct obdo ioc_obdo1; + struct obdo ioc_obdo2; + + __u64 ioc_count; + __u64 ioc_offset; + __u32 ioc_dev; + __u32 ioc_command; + + __u64 ioc_nid; + __u32 ioc_nal; + __u32 ioc_type; + + /* buffers the kernel will treat as user pointers */ + __u32 ioc_plen1; + char __user *ioc_pbuf1; + __u32 ioc_plen2; + char __user *ioc_pbuf2; + + /* inline buffers for various arguments */ + __u32 ioc_inllen1; + char *ioc_inlbuf1; + __u32 ioc_inllen2; + char *ioc_inlbuf2; + __u32 ioc_inllen3; + char *ioc_inlbuf3; + __u32 ioc_inllen4; + char *ioc_inlbuf4; + + char ioc_bulk[0]; +}; + +struct obd_ioctl_hdr { + __u32 ioc_len; + __u32 ioc_version; +}; + +static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data) +{ + __u32 len = __ALIGN_KERNEL(sizeof(*data), 8); + + len += __ALIGN_KERNEL(data->ioc_inllen1, 8); + len += __ALIGN_KERNEL(data->ioc_inllen2, 8); + len += __ALIGN_KERNEL(data->ioc_inllen3, 8); + len += __ALIGN_KERNEL(data->ioc_inllen4, 8); + + return len; +} + +/* + * OBD_IOC_DATA_TYPE is only for compatibility reasons with older + * Linux Lustre user tools. New ioctls should NOT use this macro as + * the ioctl "size". Instead the ioctl should get a "size" argument + * which is the actual data type used by the ioctl, to ensure the + * ioctl interface is versioned correctly. + */ +#define OBD_IOC_DATA_TYPE long + +/* IOC_LDLM_TEST _IOWR('f', 40, long) */ +/* IOC_LDLM_DUMP _IOWR('f', 41, long) */ +/* IOC_LDLM_REGRESS_START _IOWR('f', 42, long) */ +/* IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) */ + +#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) +#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE) +/* OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) */ + +#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GETATTR _IOWR('f', 108, OBD_IOC_DATA_TYPE) +#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) +#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE) +/* OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE) */ + +/* OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE) */ +#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE) +#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE) +#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE) +#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GETMDNAME _IOR('f', 131, char[MAX_OBD_NAME]) +#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME +#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE) + +/* OBD_IOC_DEC_FS_USE_COUNT _IO('f', 139) */ +#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE) +/* OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) */ +#define OBD_GET_VERSION _IOWR('f', 144, OBD_IOC_DATA_TYPE) +/* OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_CLOSE_UUID _IOWR('f', 147, OBD_IOC_DATA_TYPE) */ +#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GETDEVICE _IOWR('f', 149, OBD_IOC_DATA_TYPE) +#define OBD_IOC_FID2PATH _IOWR('f', 150, OBD_IOC_DATA_TYPE) +/* lustre/lustre_user.h 151-153 */ +/* OBD_IOC_LOV_SETSTRIPE 154 LL_IOC_LOV_SETSTRIPE */ +/* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */ +/* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */ +/* lustre/lustre_user.h 157-159 */ +/* OBD_IOC_QUOTACHECK _IOW('f', 160, int) */ +/* OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) */ +#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) +/* lustre/lustre_user.h 163-176 */ +#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data) +/* OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE) */ +#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) +/* OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) */ +/* OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) */ +#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE) +#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE) +#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE) +/* OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE) */ +#define OBD_IOC_NODEMAP _IOWR('f', 197, OBD_IOC_DATA_TYPE) + +/* ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) */ +/* ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) */ +/* ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) */ +/* ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) */ + +#define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE) + +/* lustre/lustre_user.h 212-217 */ +#define OBD_IOC_GET_MNTOPT _IOW('f', 220, mntopt_t) +#define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data) +#define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data) +#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE) +#define OBD_IOC_STOP_LFSCK _IOW('f', 231, OBD_IOC_DATA_TYPE) +#define OBD_IOC_QUERY_LFSCK _IOR('f', 232, struct obd_ioctl_data) +/* lustre/lustre_user.h 240-249 */ +/* LIBCFS_IOC_DEBUG_MASK 250 */ + +#define IOC_OSC_SET_ACTIVE _IOWR('h', 21, void *) + +#endif /* _UAPI_LUSTRE_IOCTL_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h new file mode 100644 index 000000000000..94dadbe8e069 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h @@ -0,0 +1,94 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * + * Author: Nathan Rutman <nathan.rutman@sun.com> + * + * Kernel <-> userspace communication routines. + * The definitions below are used in the kernel and userspace. + */ + +#ifndef __UAPI_LUSTRE_KERNELCOMM_H__ +#define __UAPI_LUSTRE_KERNELCOMM_H__ + +#include <linux/types.h> + +/* KUC message header. + * All current and future KUC messages should use this header. + * To avoid having to include Lustre headers from libcfs, define this here. + */ +struct kuc_hdr { + __u16 kuc_magic; + /* Each new Lustre feature should use a different transport */ + __u8 kuc_transport; + __u8 kuc_flags; + /* Message type or opcode, transport-specific */ + __u16 kuc_msgtype; + /* Including header */ + __u16 kuc_msglen; +} __aligned(sizeof(__u64)); + +#define KUC_CHANGELOG_MSG_MAXSIZE (sizeof(struct kuc_hdr) + CR_MAXSIZE) + +#define KUC_MAGIC 0x191C /*Lustre9etLinC */ + +/* kuc_msgtype values are defined in each transport */ +enum kuc_transport_type { + KUC_TRANSPORT_GENERIC = 1, + KUC_TRANSPORT_HSM = 2, + KUC_TRANSPORT_CHANGELOG = 3, +}; + +enum kuc_generic_message_type { + KUC_MSG_SHUTDOWN = 1, +}; + +/* KUC Broadcast Groups. This determines which userspace process hears which + * messages. Mutliple transports may be used within a group, or multiple + * groups may use the same transport. Broadcast + * groups need not be used if e.g. a UID is specified instead; + * use group 0 to signify unicast. + */ +#define KUC_GRP_HSM 0x02 +#define KUC_GRP_MAX KUC_GRP_HSM + +#define LK_FLG_STOP 0x01 +#define LK_NOFD -1U + +/* kernelcomm control structure, passed from userspace to kernel */ +struct lustre_kernelcomm { + __u32 lk_wfd; + __u32 lk_rfd; + __u32 lk_uid; + __u32 lk_group; + __u32 lk_data; + __u32 lk_flags; +} __packed; + +#endif /* __UAPI_LUSTRE_KERNELCOMM_H__ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h new file mode 100644 index 000000000000..3343b602219b --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h @@ -0,0 +1,236 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2015 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * Define ost_id associated functions + */ + +#ifndef _UAPI_LUSTRE_OSTID_H_ +#define _UAPI_LUSTRE_OSTID_H_ + +#include <linux/errno.h> +#include <uapi/linux/lustre/lustre_fid.h> + +static inline __u64 lmm_oi_id(const struct ost_id *oi) +{ + return oi->oi.oi_id; +} + +static inline __u64 lmm_oi_seq(const struct ost_id *oi) +{ + return oi->oi.oi_seq; +} + +static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq) +{ + oi->oi.oi_seq = seq; +} + +static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid) +{ + oi->oi.oi_id = oid; +} + +static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi, + const struct ost_id *src_oi) +{ + dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id); + dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq); +} + +static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, + const struct ost_id *src_oi) +{ + dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id); + dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq); +} + +/* extract OST sequence (group) from a wire ost_id (id/seq) pair */ +static inline __u64 ostid_seq(const struct ost_id *ostid) +{ + if (fid_seq_is_mdt0(ostid->oi.oi_seq)) + return FID_SEQ_OST_MDT0; + + if (fid_seq_is_default(ostid->oi.oi_seq)) + return FID_SEQ_LOV_DEFAULT; + + if (fid_is_idif(&ostid->oi_fid)) + return FID_SEQ_OST_MDT0; + + return fid_seq(&ostid->oi_fid); +} + +/* extract OST objid from a wire ost_id (id/seq) pair */ +static inline __u64 ostid_id(const struct ost_id *ostid) +{ + if (fid_seq_is_mdt0(ostid->oi.oi_seq)) + return ostid->oi.oi_id & IDIF_OID_MASK; + + if (fid_seq_is_default(ostid->oi.oi_seq)) + return ostid->oi.oi_id; + + if (fid_is_idif(&ostid->oi_fid)) + return fid_idif_id(fid_seq(&ostid->oi_fid), + fid_oid(&ostid->oi_fid), 0); + + return fid_oid(&ostid->oi_fid); +} + +static inline void ostid_set_seq(struct ost_id *oi, __u64 seq) +{ + if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) { + oi->oi.oi_seq = seq; + } else { + oi->oi_fid.f_seq = seq; + /* + * Note: if f_oid + f_ver is zero, we need init it + * to be 1, otherwise, ostid_seq will treat this + * as old ostid (oi_seq == 0) + */ + if (!oi->oi_fid.f_oid && !oi->oi_fid.f_ver) + oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID; + } +} + +static inline void ostid_set_seq_mdt0(struct ost_id *oi) +{ + ostid_set_seq(oi, FID_SEQ_OST_MDT0); +} + +static inline void ostid_set_seq_echo(struct ost_id *oi) +{ + ostid_set_seq(oi, FID_SEQ_ECHO); +} + +static inline void ostid_set_seq_llog(struct ost_id *oi) +{ + ostid_set_seq(oi, FID_SEQ_LLOG); +} + +static inline void ostid_cpu_to_le(const struct ost_id *src_oi, + struct ost_id *dst_oi) +{ + if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) { + dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id); + dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq); + } else { + fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid); + } +} + +static inline void ostid_le_to_cpu(const struct ost_id *src_oi, + struct ost_id *dst_oi) +{ + if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) { + dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id); + dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq); + } else { + fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid); + } +} + +/** + * Sigh, because pre-2.4 uses + * struct lov_mds_md_v1 { + * ........ + * __u64 lmm_object_id; + * __u64 lmm_object_seq; + * ...... + * } + * to identify the LOV(MDT) object, and lmm_object_seq will + * be normal_fid, which make it hard to combine these conversion + * to ostid_to FID. so we will do lmm_oi/fid conversion separately + * + * We can tell the lmm_oi by this way, + * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0 + * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL + * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k}, + * lmm_oi.f_ver = 0 + * + * But currently lmm_oi/lsm_oi does not have any "real" usages, + * except for printing some information, and the user can always + * get the real FID from LMA, besides this multiple case check might + * make swab more complicate. So we will keep using id/seq for lmm_oi. + */ + +static inline void fid_to_lmm_oi(const struct lu_fid *fid, + struct ost_id *oi) +{ + oi->oi.oi_id = fid_oid(fid); + oi->oi.oi_seq = fid_seq(fid); +} + +/** + * Unpack an OST object id/seq (group) into a FID. This is needed for + * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper + * FIDs. Note that if an id/seq is already in FID/IDIF format it will + * be passed through unchanged. Only legacy OST objects in "group 0" + * will be mapped into the IDIF namespace so that they can fit into the + * struct lu_fid fields without loss. + */ +static inline int ostid_to_fid(struct lu_fid *fid, const struct ost_id *ostid, + __u32 ost_idx) +{ + __u64 seq = ostid_seq(ostid); + + if (ost_idx > 0xffff) + return -EBADF; + + if (fid_seq_is_mdt0(seq)) { + __u64 oid = ostid_id(ostid); + + /* This is a "legacy" (old 1.x/2.early) OST object in "group 0" + * that we map into the IDIF namespace. It allows up to 2^48 + * objects per OST, as this is the object namespace that has + * been in production for years. This can handle create rates + * of 1M objects/s/OST for 9 years, or combinations thereof. + */ + if (oid >= IDIF_MAX_OID) + return -EBADF; + + fid->f_seq = fid_idif_seq(oid, ost_idx); + /* truncate to 32 bits by assignment */ + fid->f_oid = oid; + /* in theory, not currently used */ + fid->f_ver = oid >> 48; + } else if (!fid_seq_is_default(seq)) { + /* This is either an IDIF object, which identifies objects + * across all OSTs, or a regular FID. The IDIF namespace + * maps legacy OST objects into the FID namespace. In both + * cases, we just pass the FID through, no conversion needed. + */ + if (ostid->oi_fid.f_ver) + return -EBADF; + + *fid = ostid->oi_fid; + } + + return 0; +} +#endif /* _UAPI_LUSTRE_OSTID_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h new file mode 100644 index 000000000000..1eab2ceca338 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h @@ -0,0 +1,94 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * User-settable parameter keys + * + * Author: Nathan Rutman <nathan@clusterfs.com> + */ + +#ifndef _UAPI_LUSTRE_PARAM_H_ +#define _UAPI_LUSTRE_PARAM_H_ + +/** \defgroup param param + * + * @{ + */ + +/****************** User-settable parameter keys *********************/ +/* e.g. + * tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda + * lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0 + * ... testfs-MDT0000.lov.stripesize=4M + * ... testfs-OST0000.ost.client_cache_seconds=15 + * ... testfs.sys.timeout=<secs> + * ... testfs.llite.max_read_ahead_mb=16 + */ + +/* System global or special params not handled in obd's proc + * See mgs_write_log_sys() + */ +#define PARAM_TIMEOUT "timeout=" /* global */ +#define PARAM_LDLM_TIMEOUT "ldlm_timeout=" /* global */ +#define PARAM_AT_MIN "at_min=" /* global */ +#define PARAM_AT_MAX "at_max=" /* global */ +#define PARAM_AT_EXTRA "at_extra=" /* global */ +#define PARAM_AT_EARLY_MARGIN "at_early_margin=" /* global */ +#define PARAM_AT_HISTORY "at_history=" /* global */ +#define PARAM_JOBID_VAR "jobid_var=" /* global */ +#define PARAM_MGSNODE "mgsnode=" /* only at mounttime */ +#define PARAM_FAILNODE "failover.node=" /* add failover nid */ +#define PARAM_FAILMODE "failover.mode=" /* initial mount only */ +#define PARAM_ACTIVE "active=" /* activate/deactivate */ +#define PARAM_NETWORK "network=" /* bind on nid */ +#define PARAM_ID_UPCALL "identity_upcall=" /* identity upcall */ + +/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */ +#define PARAM_OST "ost." +#define PARAM_OSD "osd." +#define PARAM_OSC "osc." +#define PARAM_MDT "mdt." +#define PARAM_HSM "mdt.hsm." +#define PARAM_MDD "mdd." +#define PARAM_MDC "mdc." +#define PARAM_LLITE "llite." +#define PARAM_LOV "lov." +#define PARAM_LOD "lod." +#define PARAM_OSP "osp." +#define PARAM_SYS "sys." /* global */ +#define PARAM_SRPC "srpc." +#define PARAM_SRPC_FLVR "srpc.flavor." +#define PARAM_SRPC_UDESC "srpc.udesc.cli2mdt" +#define PARAM_SEC "security." +#define PARAM_QUOTA "quota." /* global */ + +/** @} param */ + +#endif /* _UAPI_LUSTRE_PARAM_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h new file mode 100644 index 000000000000..5e332e3af68a --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h @@ -0,0 +1,1325 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2010, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/include/lustre/lustre_user.h + * + * Lustre public user-space interface definitions. + */ + +#ifndef _LUSTRE_USER_H +#define _LUSTRE_USER_H + +/** \defgroup lustreuser lustreuser + * + * @{ + */ + +#ifdef __KERNEL__ +# include <linux/fs.h> +# include <linux/quota.h> +# include <linux/sched/signal.h> +# include <linux/string.h> /* snprintf() */ +# include <linux/version.h> +#else /* !__KERNEL__ */ +# define NEED_QUOTA_DEFS +# include <stdio.h> /* snprintf() */ +# include <string.h> +# include <sys/quota.h> +# include <sys/stat.h> +#endif /* __KERNEL__ */ +#include <uapi/linux/lustre/lustre_fiemap.h> + +/* + * We need to always use 64bit version because the structure + * is shared across entire cluster where 32bit and 64bit machines + * are co-existing. + */ +#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64) +typedef struct stat64 lstat_t; +#define lstat_f lstat64 +#define fstat_f fstat64 +#define fstatat_f fstatat64 +#else +typedef struct stat lstat_t; +#define lstat_f lstat +#define fstat_f fstat +#define fstatat_f fstatat +#endif + +#define HAVE_LOV_USER_MDS_DATA + +#define LUSTRE_EOF 0xffffffffffffffffULL + +/* for statfs() */ +#define LL_SUPER_MAGIC 0x0BD00BD0 + +#ifndef FSFILT_IOC_GETFLAGS +#define FSFILT_IOC_GETFLAGS _IOR('f', 1, long) +#define FSFILT_IOC_SETFLAGS _IOW('f', 2, long) +#define FSFILT_IOC_GETVERSION _IOR('f', 3, long) +#define FSFILT_IOC_SETVERSION _IOW('f', 4, long) +#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long) +#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long) +#endif + +/* FIEMAP flags supported by Lustre */ +#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER) + +enum obd_statfs_state { + OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ + OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */ + OS_STATE_RDONLY_1 = 0x00000004, /**< obsolete 1.6, was EROFS=30 */ + OS_STATE_RDONLY_2 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */ + OS_STATE_RDONLY_3 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */ +}; + +struct obd_statfs { + __u64 os_type; + __u64 os_blocks; + __u64 os_bfree; + __u64 os_bavail; + __u64 os_files; + __u64 os_ffree; + __u8 os_fsid[40]; + __u32 os_bsize; + __u32 os_namelen; + __u64 os_maxbytes; + __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */ + __u32 os_fprecreated; /* objs available now to the caller */ + /* used in QoS code to find preferred OSTs */ + __u32 os_spare2; + __u32 os_spare3; + __u32 os_spare4; + __u32 os_spare5; + __u32 os_spare6; + __u32 os_spare7; + __u32 os_spare8; + __u32 os_spare9; +}; + +/** + * File IDentifier. + * + * FID is a cluster-wide unique identifier of a file or an object (stripe). + * FIDs are never reused. + **/ +struct lu_fid { + /** + * FID sequence. Sequence is a unit of migration: all files (objects) + * with FIDs from a given sequence are stored on the same server. + * Lustre should support 2^64 objects, so even if each sequence + * has only a single object we can still enumerate 2^64 objects. + **/ + __u64 f_seq; + /* FID number within sequence. */ + __u32 f_oid; + /** + * FID version, used to distinguish different versions (in the sense + * of snapshots, etc.) of the same file system object. Not currently + * used. + **/ + __u32 f_ver; +}; + +static inline bool fid_is_zero(const struct lu_fid *fid) +{ + return !fid->f_seq && !fid->f_oid; +} + +struct filter_fid { + struct lu_fid ff_parent; /* ff_parent.f_ver == file stripe number */ +}; + +/* keep this one for compatibility */ +struct filter_fid_old { + struct lu_fid ff_parent; + __u64 ff_objid; + __u64 ff_seq; +}; + +/* Userspace should treat lu_fid as opaque, and only use the following methods + * to print or parse them. Other functions (e.g. compare, swab) could be moved + * here from lustre_idl.h if needed. + */ +struct lu_fid; + +/** + * Following struct for object attributes, that will be kept inode's EA. + * Introduced in 2.0 release (please see b15993, for details) + * Added to all objects since Lustre 2.4 as contains self FID + */ +struct lustre_mdt_attrs { + /** + * Bitfield for supported data in this structure. From enum lma_compat. + * lma_self_fid and lma_flags are always available. + */ + __u32 lma_compat; + /** + * Per-file incompat feature list. Lustre version should support all + * flags set in this field. The supported feature mask is available in + * LMA_INCOMPAT_SUPP. + */ + __u32 lma_incompat; + /** FID of this inode */ + struct lu_fid lma_self_fid; +}; + +/** + * Prior to 2.4, the LMA structure also included SOM attributes which has since + * been moved to a dedicated xattr + * lma_flags was also removed because of lma_compat/incompat fields. + */ +#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64)) + +/** + * OST object IDentifier. + */ +struct ost_id { + union { + struct { + __u64 oi_id; + __u64 oi_seq; + } oi; + struct lu_fid oi_fid; + }; +}; + +#define DOSTID "%#llx:%llu" +#define POSTID(oi) ostid_seq(oi), ostid_id(oi) + +/* + * The ioctl naming rules: + * LL_* - works on the currently opened filehandle instead of parent dir + * *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly) + * *_MDC_* - gets/sets data related to MDC + * *_LOV_* - gets/sets data related to OSC/LOV + * *FILE* - called on parent dir and passes in a filename + * *STRIPE* - set/get lov_user_md + * *INFO - set/get lov_user_mds_data + */ +/* lustre_ioctl.h 101-150 */ +#define LL_IOC_GETFLAGS _IOR('f', 151, long) +#define LL_IOC_SETFLAGS _IOW('f', 152, long) +#define LL_IOC_CLRFLAGS _IOW('f', 153, long) +#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long) +#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long) +#define LL_IOC_LOV_SETEA _IOW('f', 156, long) +/* LL_IOC_RECREATE_OBJ 157 obsolete */ +/* LL_IOC_RECREATE_FID 158 obsolete */ +#define LL_IOC_GROUP_LOCK _IOW('f', 158, long) +#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long) +/* #define LL_IOC_QUOTACHECK 160 OBD_IOC_QUOTACHECK */ +/* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */ +/* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */ +#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) +/* IOC_LOV_GETINFO 165 obsolete */ +#define LL_IOC_FLUSHCTX _IOW('f', 166, long) +/* LL_IOC_RMTACL 167 obsolete */ +#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long) +#define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long) +#define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long) +#define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid) +#define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long) +#define LL_IOC_PATH2FID _IOR('f', 173, long) +#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *) +#define LL_IOC_GET_MDTIDX _IOR('f', 175, int) + +/* lustre_ioctl.h 177-210 */ +#define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state) +#define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set) +#define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm) +#define LL_IOC_HSM_COPY_START _IOW('f', 214, struct hsm_copy *) +#define LL_IOC_HSM_COPY_END _IOW('f', 215, struct hsm_copy *) +#define LL_IOC_HSM_PROGRESS _IOW('f', 216, struct hsm_user_request) +#define LL_IOC_HSM_REQUEST _IOW('f', 217, struct hsm_user_request) +#define LL_IOC_DATA_VERSION _IOR('f', 218, struct ioc_data_version) +#define LL_IOC_LOV_SWAP_LAYOUTS _IOW('f', 219, \ + struct lustre_swap_layouts) +#define LL_IOC_HSM_ACTION _IOR('f', 220, \ + struct hsm_current_action) +/* see <lustre_lib.h> for ioctl numbers 221-232 */ + +#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md) +#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md) +#define LL_IOC_SET_LEASE _IOWR('f', 243, long) +#define LL_IOC_GET_LEASE _IO('f', 244) +#define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import) +#define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md) +#define LL_IOC_MIGRATE _IOR('f', 247, int) +#define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid) +#define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent) + +/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */ +enum ll_lease_type { + LL_LEASE_RDLCK = 0x1, + LL_LEASE_WRLCK = 0x2, + LL_LEASE_UNLCK = 0x4, +}; + +#define LL_STATFS_LMV 1 +#define LL_STATFS_LOV 2 +#define LL_STATFS_NODELAY 4 + +#define IOC_MDC_TYPE 'i' +#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) +#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *) +#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *) +#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *) + +#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */ + +/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular + * files, but are unlikely to be used in practice and are not harmful if + * used incorrectly. O_NOCTTY and FASYNC are only meaningful for character + * devices and are safe for use on new files (See LU-812, LU-4209). + */ +#define O_LOV_DELAY_CREATE (O_NOCTTY | FASYNC) + +#define LL_FILE_IGNORE_LOCK 0x00000001 +#define LL_FILE_GROUP_LOCKED 0x00000002 +#define LL_FILE_READAHEA 0x00000004 +#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */ +#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */ +#define LL_FILE_RMTACL 0x00000020 + +#define LOV_USER_MAGIC_V1 0x0BD10BD0 +#define LOV_USER_MAGIC LOV_USER_MAGIC_V1 +#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0 +#define LOV_USER_MAGIC_V3 0x0BD30BD0 +/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */ +#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */ + +#define LMV_USER_MAGIC 0x0CD30CD0 /*default lmv magic*/ + +#define LOV_PATTERN_RAID0 0x001 +#define LOV_PATTERN_RAID1 0x002 +#define LOV_PATTERN_FIRST 0x100 +#define LOV_PATTERN_CMOBD 0x200 + +#define LOV_PATTERN_F_MASK 0xffff0000 +#define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */ +#define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */ + +#define LOV_MAXPOOLNAME 15 +#define LOV_POOLNAMEF "%.15s" + +#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */ +#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS) +#define LOV_MAX_STRIPE_COUNT_OLD 160 +/* This calculation is crafted so that input of 4096 will result in 160 + * which in turn is equal to old maximal stripe count. + * XXX: In fact this is too simplified for now, what it also need is to get + * ea_type argument to clearly know how much space each stripe consumes. + * + * The limit of 12 pages is somewhat arbitrary, but is a reasonably large + * allocation that is sufficient for the current generation of systems. + * + * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) + */ +#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ +#define LOV_ALL_STRIPES 0xffff /* only valid for directories */ +#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */ + +#define XATTR_LUSTRE_PREFIX "lustre." +#define XATTR_LUSTRE_LOV "lustre.lov" + +#define lov_user_ost_data lov_user_ost_data_v1 +struct lov_user_ost_data_v1 { /* per-stripe data structure */ + struct ost_id l_ost_oi; /* OST object ID */ + __u32 l_ost_gen; /* generation of this OST index */ + __u32 l_ost_idx; /* OST index in LOV */ +} __packed; + +#define lov_user_md lov_user_md_v1 +struct lov_user_md_v1 { /* LOV EA user data (host-endian) */ + __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */ + __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ + struct ost_id lmm_oi; /* LOV object ID */ + __u32 lmm_stripe_size; /* size of stripe in bytes */ + __u16 lmm_stripe_count; /* num stripes in use for this object */ + union { + __u16 lmm_stripe_offset; /* starting stripe offset in + * lmm_objects, use when writing + */ + __u16 lmm_layout_gen; /* layout generation number + * used when reading + */ + }; + struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ +} __attribute__((packed, __may_alias__)); + +struct lov_user_md_v3 { /* LOV EA user data (host-endian) */ + __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */ + __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ + struct ost_id lmm_oi; /* LOV object ID */ + __u32 lmm_stripe_size; /* size of stripe in bytes */ + __u16 lmm_stripe_count; /* num stripes in use for this object */ + union { + __u16 lmm_stripe_offset; /* starting stripe offset in + * lmm_objects, use when writing + */ + __u16 lmm_layout_gen; /* layout generation number + * used when reading + */ + }; + char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */ + struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ +} __packed; + +static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic) +{ + if (lmm_magic == LOV_USER_MAGIC_V1) + return sizeof(struct lov_user_md_v1) + + stripes * sizeof(struct lov_user_ost_data_v1); + return sizeof(struct lov_user_md_v3) + + stripes * sizeof(struct lov_user_ost_data_v1); +} + +/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to + * use this. It is unsafe to #define those values in this header as it + * is possible the application has already #included <sys/stat.h>. */ +#ifdef HAVE_LOV_USER_MDS_DATA +#define lov_user_mds_data lov_user_mds_data_v1 +struct lov_user_mds_data_v1 { + lstat_t lmd_st; /* MDS stat struct */ + struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */ +} __packed; + +struct lov_user_mds_data_v3 { + lstat_t lmd_st; /* MDS stat struct */ + struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */ +} __packed; +#endif + +struct lmv_user_mds_data { + struct lu_fid lum_fid; + __u32 lum_padding; + __u32 lum_mds; +}; + +enum lmv_hash_type { + LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */ + LMV_HASH_TYPE_ALL_CHARS = 1, + LMV_HASH_TYPE_FNV_1A_64 = 2, +}; + +#define LMV_HASH_NAME_ALL_CHARS "all_char" +#define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64" + +/* + * Got this according to how get LOV_MAX_STRIPE_COUNT, see above, + * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data) + */ +#define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ +#define lmv_user_md lmv_user_md_v1 +struct lmv_user_md_v1 { + __u32 lum_magic; /* must be the first field */ + __u32 lum_stripe_count; /* dirstripe count */ + __u32 lum_stripe_offset; /* MDT idx for default dirstripe */ + __u32 lum_hash_type; /* Dir stripe policy */ + __u32 lum_type; /* LMV type: default or normal */ + __u32 lum_padding1; + __u32 lum_padding2; + __u32 lum_padding3; + char lum_pool_name[LOV_MAXPOOLNAME + 1]; + struct lmv_user_mds_data lum_objects[0]; +} __packed; + +static inline int lmv_user_md_size(int stripes, int lmm_magic) +{ + return sizeof(struct lmv_user_md) + + stripes * sizeof(struct lmv_user_mds_data); +} + +struct ll_recreate_obj { + __u64 lrc_id; + __u32 lrc_ost_idx; +}; + +struct ll_fid { + __u64 id; /* holds object id */ + __u32 generation; /* holds object generation */ + __u32 f_type; /* holds object type or stripe idx when passing it to + * OST for saving into EA. */ +}; + +#define UUID_MAX 40 +struct obd_uuid { + char uuid[UUID_MAX]; +}; + +static inline bool obd_uuid_equals(const struct obd_uuid *u1, + const struct obd_uuid *u2) +{ + return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0; +} + +static inline int obd_uuid_empty(struct obd_uuid *uuid) +{ + return uuid->uuid[0] == '\0'; +} + +static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp) +{ + strncpy((char *)uuid->uuid, tmp, sizeof(*uuid)); + uuid->uuid[sizeof(*uuid) - 1] = '\0'; +} + +/* For printf's only, make sure uuid is terminated */ +static inline char *obd_uuid2str(const struct obd_uuid *uuid) +{ + if (!uuid) + return NULL; + + if (uuid->uuid[sizeof(*uuid) - 1] != '\0') { + /* Obviously not safe, but for printfs, no real harm done... + * we're always null-terminated, even in a race. + */ + static char temp[sizeof(*uuid)]; + + memcpy(temp, uuid->uuid, sizeof(*uuid) - 1); + temp[sizeof(*uuid) - 1] = '\0'; + return temp; + } + return (char *)(uuid->uuid); +} + +/* Extract fsname from uuid (or target name) of a target + * e.g. (myfs-OST0007_UUID -> myfs) + * see also deuuidify. + */ +static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) +{ + char *p; + + strncpy(buf, uuid, buflen - 1); + buf[buflen - 1] = '\0'; + p = strrchr(buf, '-'); + if (p) + *p = '\0'; +} + +/* printf display format + * * usage: printf("file FID is "DFID"\n", PFID(fid)); + */ +#define FID_NOBRACE_LEN 40 +#define FID_LEN (FID_NOBRACE_LEN + 2) +#define DFID_NOBRACE "%#llx:0x%x:0x%x" +#define DFID "[" DFID_NOBRACE "]" +#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver + +/* scanf input parse format for fids in DFID_NOBRACE format + * Need to strip '[' from DFID format first or use "["SFID"]" at caller. + * usage: sscanf(fidstr, SFID, RFID(&fid)); + */ +#define SFID "0x%llx:0x%x:0x%x" +#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) + +/********* Quotas **********/ + +#define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */ +#define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */ +#define Q_GETOINFO 0x800102 /* get obd quota info */ +#define Q_GETOQUOTA 0x800103 /* get obd quotas */ +#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */ + +/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */ +#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */ +#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */ +#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */ +#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */ +#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ +#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ +/* lustre-specific control commands */ +#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */ +#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */ + +#define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */ + +#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629 + +/* permission */ +#define N_PERMS_MAX 64 + +struct perm_downcall_data { + __u64 pdd_nid; + __u32 pdd_perm; + __u32 pdd_padding; +}; + +struct identity_downcall_data { + __u32 idd_magic; + __u32 idd_err; + __u32 idd_uid; + __u32 idd_gid; + __u32 idd_nperms; + __u32 idd_ngroups; + struct perm_downcall_data idd_perms[N_PERMS_MAX]; + __u32 idd_groups[0]; +}; + +/* lustre volatile file support + * file name header: .^L^S^T^R:volatile" + */ +#define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE" +#define LUSTRE_VOLATILE_HDR_LEN 14 +/* hdr + MDT index */ +#define LUSTRE_VOLATILE_IDX LUSTRE_VOLATILE_HDR":%.4X:" + +enum lustre_quota_version { + LUSTRE_QUOTA_V2 = 1 +}; + +/* XXX: same as if_dqinfo struct in kernel */ +struct obd_dqinfo { + __u64 dqi_bgrace; + __u64 dqi_igrace; + __u32 dqi_flags; + __u32 dqi_valid; +}; + +/* XXX: same as if_dqblk struct in kernel, plus one padding */ +struct obd_dqblk { + __u64 dqb_bhardlimit; + __u64 dqb_bsoftlimit; + __u64 dqb_curspace; + __u64 dqb_ihardlimit; + __u64 dqb_isoftlimit; + __u64 dqb_curinodes; + __u64 dqb_btime; + __u64 dqb_itime; + __u32 dqb_valid; + __u32 dqb_padding; +}; + +enum { + QC_GENERAL = 0, + QC_MDTIDX = 1, + QC_OSTIDX = 2, + QC_UUID = 3 +}; + +struct if_quotactl { + __u32 qc_cmd; + __u32 qc_type; + __u32 qc_id; + __u32 qc_stat; + __u32 qc_valid; + __u32 qc_idx; + struct obd_dqinfo qc_dqinfo; + struct obd_dqblk qc_dqblk; + char obd_type[16]; + struct obd_uuid obd_uuid; +}; + +/* swap layout flags */ +#define SWAP_LAYOUTS_CHECK_DV1 (1 << 0) +#define SWAP_LAYOUTS_CHECK_DV2 (1 << 1) +#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2) +#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3) +#define SWAP_LAYOUTS_CLOSE (1 << 4) + +/* Swap XATTR_NAME_HSM as well, only on the MDT so far */ +#define SWAP_LAYOUTS_MDS_HSM (1 << 31) +struct lustre_swap_layouts { + __u64 sl_flags; + __u32 sl_fd; + __u32 sl_gid; + __u64 sl_dv1; + __u64 sl_dv2; +}; + +/********* Changelogs **********/ +/** Changelog record types */ +enum changelog_rec_type { + CL_MARK = 0, + CL_CREATE = 1, /* namespace */ + CL_MKDIR = 2, /* namespace */ + CL_HARDLINK = 3, /* namespace */ + CL_SOFTLINK = 4, /* namespace */ + CL_MKNOD = 5, /* namespace */ + CL_UNLINK = 6, /* namespace */ + CL_RMDIR = 7, /* namespace */ + CL_RENAME = 8, /* namespace */ + CL_EXT = 9, /* namespace extended record (2nd half of rename) */ + CL_OPEN = 10, /* not currently used */ + CL_CLOSE = 11, /* may be written to log only with mtime change */ + CL_LAYOUT = 12, /* file layout/striping modified */ + CL_TRUNC = 13, + CL_SETATTR = 14, + CL_XATTR = 15, + CL_HSM = 16, /* HSM specific events, see flags */ + CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */ + CL_CTIME = 18, + CL_ATIME = 19, + CL_LAST +}; + +static inline const char *changelog_type2str(int type) +{ + static const char *changelog_str[] = { + "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK", + "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC", + "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", + }; + + if (type >= 0 && type < CL_LAST) + return changelog_str[type]; + return NULL; +} + +/* per-record flags */ +#define CLF_FLAGSHIFT 12 +#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1) +#define CLF_VERMASK (~CLF_FLAGMASK) +enum changelog_rec_flags { + CLF_VERSION = 0x1000, + CLF_RENAME = 0x2000, + CLF_JOBID = 0x4000, + CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID +}; + +/* Anything under the flagmask may be per-type (if desired) */ +/* Flags for unlink */ +#define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */ +#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */ + /* HSM cleaning needed */ +/* Flags for rename */ +#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of + * target + */ +#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target + * has an archive in backend + */ + +/* Flags for HSM */ +/* 12b used (from high weight to low weight): + * 2b for flags + * 3b for event + * 7b for error code + */ +#define CLF_HSM_ERR_L 0 /* HSM return code, 7 bits */ +#define CLF_HSM_ERR_H 6 +#define CLF_HSM_EVENT_L 7 /* HSM event, 3 bits, see enum hsm_event */ +#define CLF_HSM_EVENT_H 9 +#define CLF_HSM_FLAG_L 10 /* HSM flags, 2 bits, 1 used, 1 spare */ +#define CLF_HSM_FLAG_H 11 +#define CLF_HSM_SPARE_L 12 /* 4 spare bits */ +#define CLF_HSM_SPARE_H 15 +#define CLF_HSM_LAST 15 + +/* Remove bits higher than _h, then extract the value + * between _h and _l by shifting lower weigth to bit 0. + */ +#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \ + >> (CLF_HSM_LAST - _h + _l)) + +#define CLF_HSM_SUCCESS 0x00 +#define CLF_HSM_MAXERROR 0x7E +#define CLF_HSM_ERROVERFLOW 0x7F + +#define CLF_HSM_DIRTY 1 /* file is dirty after HSM request end */ + +/* 3 bits field => 8 values allowed */ +enum hsm_event { + HE_ARCHIVE = 0, + HE_RESTORE = 1, + HE_CANCEL = 2, + HE_RELEASE = 3, + HE_REMOVE = 4, + HE_STATE = 5, + HE_SPARE1 = 6, + HE_SPARE2 = 7, +}; + +static inline enum hsm_event hsm_get_cl_event(__u16 flags) +{ + return CLF_GET_BITS(flags, CLF_HSM_EVENT_H, CLF_HSM_EVENT_L); +} + +static inline void hsm_set_cl_event(int *flags, enum hsm_event he) +{ + *flags |= (he << CLF_HSM_EVENT_L); +} + +static inline __u16 hsm_get_cl_flags(int flags) +{ + return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L); +} + +static inline void hsm_set_cl_flags(int *flags, int bits) +{ + *flags |= (bits << CLF_HSM_FLAG_L); +} + +static inline int hsm_get_cl_error(int flags) +{ + return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L); +} + +static inline void hsm_set_cl_error(int *flags, int error) +{ + *flags |= (error << CLF_HSM_ERR_L); +} + +enum changelog_send_flag { + /* Not yet implemented */ + CHANGELOG_FLAG_FOLLOW = 0x01, + /* + * Blocking IO makes sense in case of slow user parsing of the records, + * but it also prevents us from cleaning up if the records are not + * consumed. + */ + CHANGELOG_FLAG_BLOCK = 0x02, + /* Pack jobid into the changelog records if available. */ + CHANGELOG_FLAG_JOBID = 0x04, +}; + +#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \ + changelog_rec_offset(CLF_SUPPORTED)) + +/* 31 usable bytes string + null terminator. */ +#define LUSTRE_JOBID_SIZE 32 + +/* + * This is the minimal changelog record. It can contain extensions + * such as rename fields or process jobid. Its exact content is described + * by the cr_flags. + * + * Extensions are packed in the same order as their corresponding flags. + */ +struct changelog_rec { + __u16 cr_namelen; + __u16 cr_flags; /**< \a changelog_rec_flags */ + __u32 cr_type; /**< \a changelog_rec_type */ + __u64 cr_index; /**< changelog record number */ + __u64 cr_prev; /**< last index for this target fid */ + __u64 cr_time; + union { + struct lu_fid cr_tfid; /**< target fid */ + __u32 cr_markerflags; /**< CL_MARK flags */ + }; + struct lu_fid cr_pfid; /**< parent fid */ +} __packed; + +/* Changelog extension for RENAME. */ +struct changelog_ext_rename { + struct lu_fid cr_sfid; /**< source fid, or zero */ + struct lu_fid cr_spfid; /**< source parent fid, or zero */ +}; + +/* Changelog extension to include JOBID. */ +struct changelog_ext_jobid { + char cr_jobid[LUSTRE_JOBID_SIZE]; /**< zero-terminated string. */ +}; + +static inline size_t changelog_rec_offset(enum changelog_rec_flags crf) +{ + size_t size = sizeof(struct changelog_rec); + + if (crf & CLF_RENAME) + size += sizeof(struct changelog_ext_rename); + + if (crf & CLF_JOBID) + size += sizeof(struct changelog_ext_jobid); + + return size; +} + +static inline size_t changelog_rec_size(struct changelog_rec *rec) +{ + return changelog_rec_offset(rec->cr_flags); +} + +static inline size_t changelog_rec_varsize(struct changelog_rec *rec) +{ + return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen; +} + +static inline +struct changelog_ext_rename *changelog_rec_rename(struct changelog_rec *rec) +{ + enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION; + + return (struct changelog_ext_rename *)((char *)rec + + changelog_rec_offset(crf)); +} + +/* The jobid follows the rename extension, if present */ +static inline +struct changelog_ext_jobid *changelog_rec_jobid(struct changelog_rec *rec) +{ + enum changelog_rec_flags crf = rec->cr_flags & + (CLF_VERSION | CLF_RENAME); + + return (struct changelog_ext_jobid *)((char *)rec + + changelog_rec_offset(crf)); +} + +/* The name follows the rename and jobid extensions, if present */ +static inline char *changelog_rec_name(struct changelog_rec *rec) +{ + return (char *)rec + changelog_rec_offset(rec->cr_flags & + CLF_SUPPORTED); +} + +static inline size_t changelog_rec_snamelen(struct changelog_rec *rec) +{ + return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1; +} + +static inline char *changelog_rec_sname(struct changelog_rec *rec) +{ + char *cr_name = changelog_rec_name(rec); + + return cr_name + strlen(cr_name) + 1; +} + +/** + * Remap a record to the desired format as specified by the crf flags. + * The record must be big enough to contain the final remapped version. + * Superfluous extension fields are removed and missing ones are added + * and zeroed. The flags of the record are updated accordingly. + * + * The jobid and rename extensions can be added to a record, to match the + * format an application expects, typically. In this case, the newly added + * fields will be zeroed. + * The Jobid field can be removed, to guarantee compatibility with older + * clients that don't expect this field in the records they process. + * + * The following assumptions are being made: + * - CLF_RENAME will not be removed + * - CLF_JOBID will not be added without CLF_RENAME being added too + * + * @param[in,out] rec The record to remap. + * @param[in] crf_wanted Flags describing the desired extensions. + */ +static inline void changelog_remap_rec(struct changelog_rec *rec, + enum changelog_rec_flags crf_wanted) +{ + char *jid_mov, *rnm_mov; + + crf_wanted &= CLF_SUPPORTED; + + if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) + return; + + /* First move the variable-length name field */ + memmove((char *)rec + changelog_rec_offset(crf_wanted), + changelog_rec_name(rec), rec->cr_namelen); + + /* Locations of jobid and rename extensions in the remapped record */ + jid_mov = (char *)rec + + changelog_rec_offset(crf_wanted & ~CLF_JOBID); + rnm_mov = (char *)rec + + changelog_rec_offset(crf_wanted & ~(CLF_JOBID | CLF_RENAME)); + + /* Move the extension fields to the desired positions */ + if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID)) + memmove(jid_mov, changelog_rec_jobid(rec), + sizeof(struct changelog_ext_jobid)); + + if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME)) + memmove(rnm_mov, changelog_rec_rename(rec), + sizeof(struct changelog_ext_rename)); + + /* Clear newly added fields */ + if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID)) + memset(jid_mov, 0, sizeof(struct changelog_ext_jobid)); + + if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME)) + memset(rnm_mov, 0, sizeof(struct changelog_ext_rename)); + + /* Update the record's flags accordingly */ + rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted; +} + +struct ioc_changelog { + __u64 icc_recno; + __u32 icc_mdtindex; + __u32 icc_id; + __u32 icc_flags; +}; + +enum changelog_message_type { + CL_RECORD = 10, /* message is a changelog_rec */ + CL_EOF = 11, /* at end of current changelog */ +}; + +/********* Misc **********/ + +struct ioc_data_version { + __u64 idv_version; + __u64 idv_flags; /* See LL_DV_xxx */ +}; + +#define LL_DV_RD_FLUSH (1 << 0) /* Flush dirty pages from clients */ +#define LL_DV_WR_FLUSH (1 << 1) /* Flush all caching pages from clients */ + +#ifndef offsetof +# define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb))) +#endif + +#define dot_lustre_name ".lustre" + +/********* HSM **********/ + +/** HSM per-file state + * See HSM_FLAGS below. + */ +enum hsm_states { + HS_NONE = 0x00000000, + HS_EXISTS = 0x00000001, + HS_DIRTY = 0x00000002, + HS_RELEASED = 0x00000004, + HS_ARCHIVED = 0x00000008, + HS_NORELEASE = 0x00000010, + HS_NOARCHIVE = 0x00000020, + HS_LOST = 0x00000040, +}; + +/* HSM user-setable flags. */ +#define HSM_USER_MASK (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY) + +/* Other HSM flags. */ +#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED) + +/* + * All HSM-related possible flags that could be applied to a file. + * This should be kept in sync with hsm_states. + */ +#define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK) + +/** + * HSM request progress state + */ +enum hsm_progress_states { + HPS_WAITING = 1, + HPS_RUNNING = 2, + HPS_DONE = 3, +}; + +#define HPS_NONE 0 + +static inline char *hsm_progress_state2name(enum hsm_progress_states s) +{ + switch (s) { + case HPS_WAITING: return "waiting"; + case HPS_RUNNING: return "running"; + case HPS_DONE: return "done"; + default: return "unknown"; + } +} + +struct hsm_extent { + __u64 offset; + __u64 length; +} __packed; + +/** + * Current HSM states of a Lustre file. + * + * This structure purpose is to be sent to user-space mainly. It describes the + * current HSM flags and in-progress action. + */ +struct hsm_user_state { + /** Current HSM states, from enum hsm_states. */ + __u32 hus_states; + __u32 hus_archive_id; + /** The current undergoing action, if there is one */ + __u32 hus_in_progress_state; + __u32 hus_in_progress_action; + struct hsm_extent hus_in_progress_location; + char hus_extended_info[]; +}; + +struct hsm_state_set_ioc { + struct lu_fid hssi_fid; + __u64 hssi_setmask; + __u64 hssi_clearmask; +}; + +/* + * This structure describes the current in-progress action for a file. + * it is returned to user space and send over the wire + */ +struct hsm_current_action { + /** The current undergoing action, if there is one */ + /* state is one of hsm_progress_states */ + __u32 hca_state; + /* action is one of hsm_user_action */ + __u32 hca_action; + struct hsm_extent hca_location; +}; + +/***** HSM user requests ******/ +/* User-generated (lfs/ioctl) request types */ +enum hsm_user_action { + HUA_NONE = 1, /* no action (noop) */ + HUA_ARCHIVE = 10, /* copy to hsm */ + HUA_RESTORE = 11, /* prestage */ + HUA_RELEASE = 12, /* drop ost objects */ + HUA_REMOVE = 13, /* remove from archive */ + HUA_CANCEL = 14 /* cancel a request */ +}; + +static inline char *hsm_user_action2name(enum hsm_user_action a) +{ + switch (a) { + case HUA_NONE: return "NOOP"; + case HUA_ARCHIVE: return "ARCHIVE"; + case HUA_RESTORE: return "RESTORE"; + case HUA_RELEASE: return "RELEASE"; + case HUA_REMOVE: return "REMOVE"; + case HUA_CANCEL: return "CANCEL"; + default: return "UNKNOWN"; + } +} + +/* + * List of hr_flags (bit field) + */ +#define HSM_FORCE_ACTION 0x0001 +/* used by CT, connot be set by user */ +#define HSM_GHOST_COPY 0x0002 + +/** + * Contains all the fixed part of struct hsm_user_request. + * + */ +struct hsm_request { + __u32 hr_action; /* enum hsm_user_action */ + __u32 hr_archive_id; /* archive id, used only with HUA_ARCHIVE */ + __u64 hr_flags; /* request flags */ + __u32 hr_itemcount; /* item count in hur_user_item vector */ + __u32 hr_data_len; +}; + +struct hsm_user_item { + struct lu_fid hui_fid; + struct hsm_extent hui_extent; +} __packed; + +struct hsm_user_request { + struct hsm_request hur_request; + struct hsm_user_item hur_user_item[0]; + /* extra data blob at end of struct (after all + * hur_user_items), only use helpers to access it + */ +} __packed; + +/** Return pointer to data field in a hsm user request */ +static inline void *hur_data(struct hsm_user_request *hur) +{ + return &hur->hur_user_item[hur->hur_request.hr_itemcount]; +} + +/** + * Compute the current length of the provided hsm_user_request. This returns -1 + * instead of an errno because ssize_t is defined to be only [ -1, SSIZE_MAX ] + * + * return -1 on bounds check error. + */ +static inline ssize_t hur_len(struct hsm_user_request *hur) +{ + __u64 size; + + /* can't overflow a __u64 since hr_itemcount is only __u32 */ + size = offsetof(struct hsm_user_request, hur_user_item[0]) + + (__u64)hur->hur_request.hr_itemcount * + sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len; + + if (size != (ssize_t)size) + return -1; + + return size; +} + +/****** HSM RPCs to copytool *****/ +/* Message types the copytool may receive */ +enum hsm_message_type { + HMT_ACTION_LIST = 100, /* message is a hsm_action_list */ +}; + +/* Actions the copytool may be instructed to take for a given action_item */ +enum hsm_copytool_action { + HSMA_NONE = 10, /* no action */ + HSMA_ARCHIVE = 20, /* arbitrary offset */ + HSMA_RESTORE = 21, + HSMA_REMOVE = 22, + HSMA_CANCEL = 23 +}; + +static inline char *hsm_copytool_action2name(enum hsm_copytool_action a) +{ + switch (a) { + case HSMA_NONE: return "NOOP"; + case HSMA_ARCHIVE: return "ARCHIVE"; + case HSMA_RESTORE: return "RESTORE"; + case HSMA_REMOVE: return "REMOVE"; + case HSMA_CANCEL: return "CANCEL"; + default: return "UNKNOWN"; + } +} + +/* Copytool item action description */ +struct hsm_action_item { + __u32 hai_len; /* valid size of this struct */ + __u32 hai_action; /* hsm_copytool_action, but use known size */ + struct lu_fid hai_fid; /* Lustre FID to operated on */ + struct lu_fid hai_dfid; /* fid used for data access */ + struct hsm_extent hai_extent; /* byte range to operate on */ + __u64 hai_cookie; /* action cookie from coordinator */ + __u64 hai_gid; /* grouplock id */ + char hai_data[0]; /* variable length */ +} __packed; + +/* + * helper function which print in hexa the first bytes of + * hai opaque field + * \param hai [IN] record to print + * \param buffer [OUT] output buffer + * \param len [IN] max buffer len + * \retval buffer + */ +static inline char *hai_dump_data_field(struct hsm_action_item *hai, + char *buffer, size_t len) +{ + int i, data_len; + char *ptr; + + ptr = buffer; + data_len = hai->hai_len - sizeof(*hai); + for (i = 0; (i < data_len) && (len > 2); i++) { + snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]); + ptr += 2; + len -= 2; + } + + *ptr = '\0'; + + return buffer; +} + +/* Copytool action list */ +#define HAL_VERSION 1 +#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */ +struct hsm_action_list { + __u32 hal_version; + __u32 hal_count; /* number of hai's to follow */ + __u64 hal_compound_id; /* returned by coordinator */ + __u64 hal_flags; + __u32 hal_archive_id; /* which archive backend */ + __u32 padding1; + char hal_fsname[0]; /* null-terminated */ + /* struct hsm_action_item[hal_count] follows, aligned on 8-byte + * boundaries. See hai_first + */ +} __packed; + +#ifndef HAVE_CFS_SIZE_ROUND +static inline int cfs_size_round(int val) +{ + return (val + 7) & (~0x7); +} + +#define HAVE_CFS_SIZE_ROUND +#endif + +/* Return pointer to first hai in action list */ +static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal) +{ + return (struct hsm_action_item *)(hal->hal_fsname + + cfs_size_round(strlen(hal-> \ + hal_fsname) + + 1)); +} + +/* Return pointer to next hai */ +static inline struct hsm_action_item *hai_next(struct hsm_action_item *hai) +{ + return (struct hsm_action_item *)((char *)hai + + cfs_size_round(hai->hai_len)); +} + +/* Return size of an hsm_action_list */ +static inline int hal_size(struct hsm_action_list *hal) +{ + int i, sz; + struct hsm_action_item *hai; + + sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1); + hai = hai_first(hal); + for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) + sz += cfs_size_round(hai->hai_len); + + return sz; +} + +/* HSM file import + * describe the attributes to be set on imported file + */ +struct hsm_user_import { + __u64 hui_size; + __u64 hui_atime; + __u64 hui_mtime; + __u32 hui_atime_ns; + __u32 hui_mtime_ns; + __u32 hui_uid; + __u32 hui_gid; + __u32 hui_mode; + __u32 hui_archive_id; +}; + +/* Copytool progress reporting */ +#define HP_FLAG_COMPLETED 0x01 +#define HP_FLAG_RETRY 0x02 + +struct hsm_progress { + struct lu_fid hp_fid; + __u64 hp_cookie; + struct hsm_extent hp_extent; + __u16 hp_flags; + __u16 hp_errval; /* positive val */ + __u32 padding; +}; + +struct hsm_copy { + __u64 hc_data_version; + __u16 hc_flags; + __u16 hc_errval; /* positive val */ + __u32 padding; + struct hsm_action_item hc_hai; +}; + +/** @} lustreuser */ + +#endif /* _LUSTRE_USER_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h new file mode 100644 index 000000000000..19c9135e2273 --- /dev/null +++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h @@ -0,0 +1,27 @@ +#ifndef _LUSTRE_VER_H_ +#define _LUSTRE_VER_H_ + +#define LUSTRE_MAJOR 2 +#define LUSTRE_MINOR 6 +#define LUSTRE_PATCH 99 +#define LUSTRE_FIX 0 +#define LUSTRE_VERSION_STRING "2.6.99" + +#define OBD_OCD_VERSION(major, minor, patch, fix) \ + (((major) << 24) + ((minor) << 16) + ((patch) << 8) + (fix)) + +#define OBD_OCD_VERSION_MAJOR(version) ((int)((version) >> 24) & 255) +#define OBD_OCD_VERSION_MINOR(version) ((int)((version) >> 16) & 255) +#define OBD_OCD_VERSION_PATCH(version) ((int)((version) >> 8) & 255) +#define OBD_OCD_VERSION_FIX(version) ((int)((version) >> 0) & 255) + +#define LUSTRE_VERSION_CODE \ + OBD_OCD_VERSION(LUSTRE_MAJOR, LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX) + +/* + * If lustre version of client and servers it connects to differs by more + * than this amount, client would issue a warning. + */ +#define LUSTRE_VERSION_OFFSET_WARN OBD_OCD_VERSION(0, 4, 0, 0) + +#endif |