aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/drbd.h81
-rw-r--r--include/linux/drbd_genl.h378
-rw-r--r--include/linux/drbd_genl_api.h55
-rw-r--r--include/linux/drbd_limits.h90
-rw-r--r--include/linux/drbd_nl.h163
-rw-r--r--include/linux/drbd_tag_magic.h84
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/genhd.h8
-rw-r--r--include/linux/genl_magic_func.h422
-rw-r--r--include/linux/genl_magic_struct.h277
-rw-r--r--include/linux/idr.h11
-rw-r--r--include/linux/ipc_namespace.h9
-rw-r--r--include/linux/loop.h3
-rw-r--r--include/linux/lru_cache.h67
-rw-r--r--include/linux/mnt_namespace.h3
-rw-r--r--include/linux/nsproxy.h2
-rw-r--r--include/linux/pid_namespace.h11
-rw-r--r--include/linux/proc_fs.h26
-rw-r--r--include/linux/user_namespace.h10
-rw-r--r--include/linux/utsname.h7
-rw-r--r--include/linux/wait.h164
-rw-r--r--include/net/net_namespace.h2
23 files changed, 1533 insertions, 344 deletions
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 0142aacb70b7..abb2cd50f6b2 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -344,10 +344,8 @@ static inline void put_cred(const struct cred *_cred)
extern struct user_namespace init_user_ns;
#ifdef CONFIG_USER_NS
#define current_user_ns() (current_cred_xxx(user_ns))
-#define task_user_ns(task) (task_cred_xxx((task), user_ns))
#else
#define current_user_ns() (&init_user_ns)
-#define task_user_ns(task) (&init_user_ns)
#endif
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 47e3d4850584..0c5a18ec322c 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,12 +51,11 @@
#endif
-
extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.13"
-#define API_VERSION 88
+#define REL_VERSION "8.4.2"
+#define API_VERSION 1
#define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 96
+#define PRO_VERSION_MAX 101
enum drbd_io_error_p {
@@ -66,7 +65,8 @@ enum drbd_io_error_p {
};
enum drbd_fencing_p {
- FP_DONT_CARE,
+ FP_NOT_AVAIL = -1, /* Not a policy */
+ FP_DONT_CARE = 0,
FP_RESOURCE,
FP_STONITH
};
@@ -102,6 +102,20 @@ enum drbd_on_congestion {
OC_DISCONNECT,
};
+enum drbd_read_balancing {
+ RB_PREFER_LOCAL,
+ RB_PREFER_REMOTE,
+ RB_ROUND_ROBIN,
+ RB_LEAST_PENDING,
+ RB_CONGESTED_REMOTE,
+ RB_32K_STRIPING,
+ RB_64K_STRIPING,
+ RB_128K_STRIPING,
+ RB_256K_STRIPING,
+ RB_512K_STRIPING,
+ RB_1M_STRIPING,
+};
+
/* KEEP the order, do not delete or insert. Only append. */
enum drbd_ret_code {
ERR_CODE_BASE = 100,
@@ -122,7 +136,7 @@ enum drbd_ret_code {
ERR_AUTH_ALG = 120,
ERR_AUTH_ALG_ND = 121,
ERR_NOMEM = 122,
- ERR_DISCARD = 123,
+ ERR_DISCARD_IMPOSSIBLE = 123,
ERR_DISK_CONFIGURED = 124,
ERR_NET_CONFIGURED = 125,
ERR_MANDATORY_TAG = 126,
@@ -130,8 +144,8 @@ enum drbd_ret_code {
ERR_INTR = 129, /* EINTR */
ERR_RESIZE_RESYNC = 130,
ERR_NO_PRIMARY = 131,
- ERR_SYNC_AFTER = 132,
- ERR_SYNC_AFTER_CYCLE = 133,
+ ERR_RESYNC_AFTER = 132,
+ ERR_RESYNC_AFTER_CYCLE = 133,
ERR_PAUSE_IS_SET = 134,
ERR_PAUSE_IS_CLEAR = 135,
ERR_PACKET_NR = 137,
@@ -155,6 +169,14 @@ enum drbd_ret_code {
ERR_CONG_NOT_PROTO_A = 155,
ERR_PIC_AFTER_DEP = 156,
ERR_PIC_PEER_DEP = 157,
+ ERR_RES_NOT_KNOWN = 158,
+ ERR_RES_IN_USE = 159,
+ ERR_MINOR_CONFIGURED = 160,
+ ERR_MINOR_EXISTS = 161,
+ ERR_INVALID_REQUEST = 162,
+ ERR_NEED_APV_100 = 163,
+ ERR_NEED_ALLOW_TWO_PRI = 164,
+ ERR_MD_UNCLEAN = 165,
/* insert new ones above this line */
AFTER_LAST_ERR_CODE
@@ -296,7 +318,8 @@ enum drbd_state_rv {
SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */
SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */
SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */
- SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */
+ SS_O_VOL_PEER_PRI = -20,
+ SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */
};
/* from drbd_strings.c */
@@ -313,7 +336,9 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
#define MDF_FULL_SYNC (1 << 3)
#define MDF_WAS_UP_TO_DATE (1 << 4)
#define MDF_PEER_OUT_DATED (1 << 5)
-#define MDF_CRASHED_PRIMARY (1 << 6)
+#define MDF_CRASHED_PRIMARY (1 << 6)
+#define MDF_AL_CLEAN (1 << 7)
+#define MDF_AL_DISABLED (1 << 8)
enum drbd_uuid_index {
UI_CURRENT,
@@ -333,37 +358,23 @@ enum drbd_timeout_flag {
#define UUID_JUST_CREATED ((__u64)4)
+/* magic numbers used in meta data and network packets */
#define DRBD_MAGIC 0x83740267
-#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
#define DRBD_MAGIC_BIG 0x835a
-#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
+#define DRBD_MAGIC_100 0x8620ec20
+
+#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3)
+#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4)
+#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5)
+
+
+/* how I came up with this magic?
+ * base64 decode "actlog==" ;) */
+#define DRBD_AL_MAGIC 0x69cb65a2
/* these are of type "int" */
#define DRBD_MD_INDEX_INTERNAL -1
#define DRBD_MD_INDEX_FLEX_EXT -2
#define DRBD_MD_INDEX_FLEX_INT -3
-/* Start of the new netlink/connector stuff */
-
-#define DRBD_NL_CREATE_DEVICE 0x01
-#define DRBD_NL_SET_DEFAULTS 0x02
-
-
-/* For searching a vacant cn_idx value */
-#define CN_IDX_STEP 6977
-
-struct drbd_nl_cfg_req {
- int packet_type;
- unsigned int drbd_minor;
- int flags;
- unsigned short tag_list[];
-};
-
-struct drbd_nl_cfg_reply {
- int packet_type;
- unsigned int minor;
- int ret_code; /* enum ret_code or set_st_err_t */
- unsigned short tag_list[]; /* only used with get_* calls */
-};
-
#endif
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
new file mode 100644
index 000000000000..d0d8fac8a6e4
--- /dev/null
+++ b/include/linux/drbd_genl.h
@@ -0,0 +1,378 @@
+/*
+ * General overview:
+ * full generic netlink message:
+ * |nlmsghdr|genlmsghdr|<payload>
+ *
+ * payload:
+ * |optional fixed size family header|<sequence of netlink attributes>
+ *
+ * sequence of netlink attributes:
+ * I chose to have all "top level" attributes NLA_NESTED,
+ * corresponding to some real struct.
+ * So we have a sequence of |tla, len|<nested nla sequence>
+ *
+ * nested nla sequence:
+ * may be empty, or contain a sequence of netlink attributes
+ * representing the struct fields.
+ *
+ * The tag number of any field (regardless of containing struct)
+ * will be available as T_ ## field_name,
+ * so you cannot have the same field name in two differnt structs.
+ *
+ * The tag numbers themselves are per struct, though,
+ * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
+ * which we won't use here).
+ * The tag numbers are used as index in the respective nla_policy array.
+ *
+ * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
+ * genl_magic_struct.h
+ * generates the struct declaration,
+ * generates an entry in the tla enum,
+ * genl_magic_func.h
+ * generates an entry in the static tla policy
+ * with .type = NLA_NESTED
+ * generates the static <struct_name>_nl_policy definition,
+ * and static conversion functions
+ *
+ * genl_magic_func.h
+ *
+ * GENL_mc_group(group)
+ * genl_magic_struct.h
+ * does nothing
+ * genl_magic_func.h
+ * defines and registers the mcast group,
+ * and provides a send helper
+ *
+ * GENL_notification(op_name, op_num, mcast_group, tla list)
+ * These are notifications to userspace.
+ *
+ * genl_magic_struct.h
+ * generates an entry in the genl_ops enum,
+ * genl_magic_func.h
+ * does nothing
+ *
+ * mcast group: the name of the mcast group this notification should be
+ * expected on
+ * tla list: the list of expected top level attributes,
+ * for documentation and sanity checking.
+ *
+ * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
+ * These are requests from userspace.
+ *
+ * _op and _notification share the same "number space",
+ * op_nr will be assigned to "genlmsghdr->cmd"
+ *
+ * genl_magic_struct.h
+ * generates an entry in the genl_ops enum,
+ * genl_magic_func.h
+ * generates an entry in the static genl_ops array,
+ * and static register/unregister functions to
+ * genl_register_family_with_ops().
+ *
+ * flags and handler:
+ * GENL_op_init( .doit = x, .dumpit = y, .flags = something)
+ * GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
+ * tla list: the list of expected top level attributes,
+ * for documentation and sanity checking.
+ */
+
+/*
+ * STRUCTS
+ */
+
+/* this is sent kernel -> userland on various error conditions, and contains
+ * informational textual info, which is supposedly human readable.
+ * The computer relevant return code is in the drbd_genlmsghdr.
+ */
+GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
+ /* "arbitrary" size strings, nla_policy.len = 0 */
+ __str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0)
+)
+
+/* Configuration requests typically need a context to operate on.
+ * Possible keys are device minor (fits in the drbd_genlmsghdr),
+ * the replication link (aka connection) name,
+ * and/or the replication group (aka resource) name,
+ * and the volume id within the resource. */
+GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
+ __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume)
+ __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128)
+ __bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128)
+ __bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128)
+)
+
+GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
+ __str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128)
+ __str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128)
+ __s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx)
+
+ /* use the resize command to try and change the disk_size */
+ __u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size)
+ /* we could change the max_bio_bvecs,
+ * but it won't propagate through the stack */
+ __u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs)
+
+ __u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF)
+ __u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF)
+
+ __u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF)
+ __s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF)
+ __u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF)
+ __u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+ __u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+ __u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF)
+ __u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF)
+ __u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF)
+
+ __flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF)
+ __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF)
+ __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF)
+ __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF)
+ __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
+ __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF)
+ /* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
+ __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF)
+)
+
+GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
+ __str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32)
+ __u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF)
+)
+
+GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
+ __str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
+ shared_secret, SHARED_SECRET_MAX)
+ __str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX)
+ __str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX)
+ __str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX)
+ __str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX)
+ __u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF)
+ __u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF)
+ __u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF)
+ __u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF)
+ __u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF)
+ __u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+ __u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+ __u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF)
+ __u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF)
+ __u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+ __u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+ __u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+ __u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+ __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+ __u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF)
+ __u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF)
+ __u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF)
+ __u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF)
+ __flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+ __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data)
+ __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF)
+ __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF)
+ __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative)
+ __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF)
+ /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */
+)
+
+GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
+ __flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate)
+)
+
+GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
+ __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size)
+ __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force)
+ __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync)
+)
+
+GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
+ /* the reason of the broadcast,
+ * if this is an event triggered broadcast. */
+ __u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason)
+ __u32_field(2, DRBD_F_REQUIRED, current_state)
+ __u64_field(3, DRBD_GENLA_F_MANDATORY, capacity)
+ __u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid)
+
+ /* These are for broadcast from after state change work.
+ * prev_state and new_state are from the moment the state change took
+ * place, new_state is not neccessarily the same as current_state,
+ * there may have been more state changes since. Which will be
+ * broadcasted soon, in their respective after state change work. */
+ __u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state)
+ __u32_field(6, DRBD_GENLA_F_MANDATORY, new_state)
+
+ /* if we have a local disk: */
+ __bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64)))
+ __u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags)
+ __u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total)
+ __u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos)
+ /* and in case resync or online verify is active */
+ __u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total)
+ __u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed)
+
+ /* for pre and post notifications of helper execution */
+ __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32)
+ __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code)
+
+ __u64_field(15, 0, send_cnt)
+ __u64_field(16, 0, recv_cnt)
+ __u64_field(17, 0, read_cnt)
+ __u64_field(18, 0, writ_cnt)
+ __u64_field(19, 0, al_writ_cnt)
+ __u64_field(20, 0, bm_writ_cnt)
+ __u32_field(21, 0, ap_bio_cnt)
+ __u32_field(22, 0, ap_pending_cnt)
+ __u32_field(23, 0, rs_pending_cnt)
+)
+
+GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
+ __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector)
+ __u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector)
+)
+
+GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
+ __flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
+)
+
+GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
+ __u32_field(1, DRBD_F_REQUIRED, timeout_type)
+)
+
+GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
+ __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect)
+)
+
+GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
+ __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach)
+)
+
+/*
+ * Notifications and commands (genlmsghdr->cmd)
+ */
+GENL_mc_group(events)
+
+ /* kernel -> userspace announcement of changes */
+GENL_notification(
+ DRBD_EVENT, 1, events,
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
+)
+
+ /* query kernel for specific or all info */
+GENL_op(
+ DRBD_ADM_GET_STATUS, 2,
+ GENL_op_init(
+ .doit = drbd_adm_get_status,
+ .dumpit = drbd_adm_get_status_all,
+ /* anyone may ask for the status,
+ * it is broadcasted anyways */
+ ),
+ /* To select the object .doit.
+ * Or a subset of objects in .dumpit. */
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+)
+
+ /* add DRBD minor devices as volumes to resources */
+GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+ /* add or delete resources */
+GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
+ GENL_doit(drbd_adm_resource_opts),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+ DRBD_ADM_CONNECT, 10,
+ GENL_doit(drbd_adm_connect),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+ DRBD_ADM_CHG_NET_OPTS, 29,
+ GENL_doit(drbd_adm_net_opts),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_ATTACH, 12,
+ GENL_doit(drbd_adm_attach),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
+ GENL_doit(drbd_adm_disk_opts),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+ DRBD_ADM_RESIZE, 13,
+ GENL_doit(drbd_adm_resize),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+ DRBD_ADM_PRIMARY, 14,
+ GENL_doit(drbd_adm_set_role),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+ DRBD_ADM_SECONDARY, 15,
+ GENL_doit(drbd_adm_set_role),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+ DRBD_ADM_NEW_C_UUID, 16,
+ GENL_doit(drbd_adm_new_c_uuid),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+ DRBD_ADM_START_OV, 17,
+ GENL_doit(drbd_adm_start_ov),
+ GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h
new file mode 100644
index 000000000000..9ef50d51e34e
--- /dev/null
+++ b/include/linux/drbd_genl_api.h
@@ -0,0 +1,55 @@
+#ifndef DRBD_GENL_STRUCT_H
+#define DRBD_GENL_STRUCT_H
+
+/**
+ * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
+ * @minor:
+ * For admin requests (user -> kernel): which minor device to operate on.
+ * For (unicast) replies or informational (broadcast) messages
+ * (kernel -> user): which minor device the information is about.
+ * If we do not operate on minors, but on connections or resources,
+ * the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
+ * is used instead.
+ * @flags: possible operation modifiers (relevant only for user->kernel):
+ * DRBD_GENL_F_SET_DEFAULTS
+ * @volume:
+ * When creating a new minor (adding it to a resource), the resource needs
+ * to know which volume number within the resource this is supposed to be.
+ * The volume number corresponds to the same volume number on the remote side,
+ * whereas the minor number on the remote side may be different
+ * (union with flags).
+ * @ret_code: kernel->userland unicast cfg reply return code (union with flags);
+ */
+struct drbd_genlmsghdr {
+ __u32 minor;
+ union {
+ __u32 flags;
+ __s32 ret_code;
+ };
+};
+
+/* To be used in drbd_genlmsghdr.flags */
+enum {
+ DRBD_GENL_F_SET_DEFAULTS = 1,
+};
+
+enum drbd_state_info_bcast_reason {
+ SIB_GET_STATUS_REPLY = 1,
+ SIB_STATE_CHANGE = 2,
+ SIB_HELPER_PRE = 3,
+ SIB_HELPER_POST = 4,
+ SIB_SYNC_PROGRESS = 5,
+};
+
+/* hack around predefined gcc/cpp "linux=1",
+ * we cannot possibly include <1/drbd_genl.h> */
+#undef linux
+
+#include <linux/drbd.h>
+#define GENL_MAGIC_VERSION API_VERSION
+#define GENL_MAGIC_FAMILY drbd
+#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr)
+#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
+#include <linux/genl_magic_struct.h>
+
+#endif
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index fb670bf603f7..1fa19c5f5e64 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -16,29 +16,37 @@
#define DEBUG_RANGE_CHECK 0
#define DRBD_MINOR_COUNT_MIN 1
-#define DRBD_MINOR_COUNT_MAX 256
+#define DRBD_MINOR_COUNT_MAX 255
#define DRBD_MINOR_COUNT_DEF 32
+#define DRBD_MINOR_COUNT_SCALE '1'
+
+#define DRBD_VOLUME_MAX 65535
#define DRBD_DIALOG_REFRESH_MIN 0
#define DRBD_DIALOG_REFRESH_MAX 600
+#define DRBD_DIALOG_REFRESH_SCALE '1'
/* valid port number */
#define DRBD_PORT_MIN 1
#define DRBD_PORT_MAX 0xffff
+#define DRBD_PORT_SCALE '1'
/* startup { */
/* if you want more than 3.4 days, disable */
#define DRBD_WFC_TIMEOUT_MIN 0
#define DRBD_WFC_TIMEOUT_MAX 300000
#define DRBD_WFC_TIMEOUT_DEF 0
+#define DRBD_WFC_TIMEOUT_SCALE '1'
#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
+#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
+#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
/* }*/
/* net { */
@@ -47,75 +55,91 @@
#define DRBD_TIMEOUT_MIN 1
#define DRBD_TIMEOUT_MAX 600
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
+#define DRBD_TIMEOUT_SCALE '1'
/* If backing disk takes longer than disk_timeout, mark the disk as failed */
#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
+#define DRBD_DISK_TIMEOUT_SCALE '1'
/* active connection retries when C_WF_CONNECTION */
#define DRBD_CONNECT_INT_MIN 1
#define DRBD_CONNECT_INT_MAX 120
#define DRBD_CONNECT_INT_DEF 10 /* seconds */
+#define DRBD_CONNECT_INT_SCALE '1'
/* keep-alive probes when idle */
#define DRBD_PING_INT_MIN 1
#define DRBD_PING_INT_MAX 120
#define DRBD_PING_INT_DEF 10
+#define DRBD_PING_INT_SCALE '1'
/* timeout for the ping packets.*/
#define DRBD_PING_TIMEO_MIN 1
#define DRBD_PING_TIMEO_MAX 300
#define DRBD_PING_TIMEO_DEF 5
+#define DRBD_PING_TIMEO_SCALE '1'
/* max number of write requests between write barriers */
#define DRBD_MAX_EPOCH_SIZE_MIN 1
#define DRBD_MAX_EPOCH_SIZE_MAX 20000
#define DRBD_MAX_EPOCH_SIZE_DEF 2048
+#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
/* I don't think that a tcp send buffer of more than 10M is useful */
#define DRBD_SNDBUF_SIZE_MIN 0
#define DRBD_SNDBUF_SIZE_MAX (10<<20)
#define DRBD_SNDBUF_SIZE_DEF 0
+#define DRBD_SNDBUF_SIZE_SCALE '1'
#define DRBD_RCVBUF_SIZE_MIN 0
#define DRBD_RCVBUF_SIZE_MAX (10<<20)
#define DRBD_RCVBUF_SIZE_DEF 0
+#define DRBD_RCVBUF_SIZE_SCALE '1'
/* @4k PageSize -> 128kB - 512MB */
#define DRBD_MAX_BUFFERS_MIN 32
#define DRBD_MAX_BUFFERS_MAX 131072
#define DRBD_MAX_BUFFERS_DEF 2048
+#define DRBD_MAX_BUFFERS_SCALE '1'
/* @4k PageSize -> 4kB - 512MB */
#define DRBD_UNPLUG_WATERMARK_MIN 1
#define DRBD_UNPLUG_WATERMARK_MAX 131072
#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
+#define DRBD_UNPLUG_WATERMARK_SCALE '1'
/* 0 is disabled.
* 200 should be more than enough even for very short timeouts */
#define DRBD_KO_COUNT_MIN 0
#define DRBD_KO_COUNT_MAX 200
-#define DRBD_KO_COUNT_DEF 0
+#define DRBD_KO_COUNT_DEF 7
+#define DRBD_KO_COUNT_SCALE '1'
/* } */
/* syncer { */
/* FIXME allow rate to be zero? */
-#define DRBD_RATE_MIN 1
+#define DRBD_RESYNC_RATE_MIN 1
/* channel bonding 10 GbE, or other hardware */
-#define DRBD_RATE_MAX (4 << 20)
-#define DRBD_RATE_DEF 250 /* kb/second */
+#define DRBD_RESYNC_RATE_MAX (4 << 20)
+#define DRBD_RESYNC_RATE_DEF 250
+#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */
/* less than 7 would hit performance unnecessarily.
- * 3833 is the largest prime that still does fit
- * into 64 sectors of activity log */
+ * 919 slots context information per transaction,
+ * 32k activity log, 4k transaction size,
+ * one transaction in flight:
+ * 919 * 7 = 6433 */
#define DRBD_AL_EXTENTS_MIN 7
-#define DRBD_AL_EXTENTS_MAX 3833
-#define DRBD_AL_EXTENTS_DEF 127
+#define DRBD_AL_EXTENTS_MAX 6433
+#define DRBD_AL_EXTENTS_DEF 1237
+#define DRBD_AL_EXTENTS_SCALE '1'
-#define DRBD_AFTER_MIN -1
-#define DRBD_AFTER_MAX 255
-#define DRBD_AFTER_DEF -1
+#define DRBD_MINOR_NUMBER_MIN -1
+#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1)
+#define DRBD_MINOR_NUMBER_DEF -1
+#define DRBD_MINOR_NUMBER_SCALE '1'
/* } */
@@ -124,11 +148,12 @@
* the upper limit with 64bit kernel, enough ram and flexible meta data
* is 1 PiB, currently. */
/* DRBD_MAX_SECTORS */
-#define DRBD_DISK_SIZE_SECT_MIN 0
-#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40))
-#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_MIN 0
+#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40))
+#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_SCALE 's' /* sectors */
-#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
+#define DRBD_ON_IO_ERROR_DEF EP_DETACH
#define DRBD_FENCING_DEF FP_DONT_CARE
#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
@@ -136,38 +161,59 @@
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
#define DRBD_ON_CONGESTION_DEF OC_BLOCK
+#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
#define DRBD_MAX_BIO_BVECS_MIN 0
#define DRBD_MAX_BIO_BVECS_MAX 128
#define DRBD_MAX_BIO_BVECS_DEF 0
+#define DRBD_MAX_BIO_BVECS_SCALE '1'
#define DRBD_C_PLAN_AHEAD_MIN 0
#define DRBD_C_PLAN_AHEAD_MAX 300
-#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */
+#define DRBD_C_PLAN_AHEAD_DEF 20
+#define DRBD_C_PLAN_AHEAD_SCALE '1'
#define DRBD_C_DELAY_TARGET_MIN 1
#define DRBD_C_DELAY_TARGET_MAX 100
#define DRBD_C_DELAY_TARGET_DEF 10
+#define DRBD_C_DELAY_TARGET_SCALE '1'
#define DRBD_C_FILL_TARGET_MIN 0
#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
-#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
+#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
+#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */
-#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */
+#define DRBD_C_MAX_RATE_MIN 250
#define DRBD_C_MAX_RATE_MAX (4 << 20)
#define DRBD_C_MAX_RATE_DEF 102400
+#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */
-#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */
+#define DRBD_C_MIN_RATE_MIN 0
#define DRBD_C_MIN_RATE_MAX (4 << 20)
-#define DRBD_C_MIN_RATE_DEF 4096
+#define DRBD_C_MIN_RATE_DEF 250
+#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */
#define DRBD_CONG_FILL_MIN 0
#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */
#define DRBD_CONG_FILL_DEF 0
+#define DRBD_CONG_FILL_SCALE 's' /* sectors */
#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN
#define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX
#define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF
+#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
+
+#define DRBD_PROTOCOL_DEF DRBD_PROT_C
+
+#define DRBD_DISK_BARRIER_DEF 0
+#define DRBD_DISK_FLUSHES_DEF 1
+#define DRBD_DISK_DRAIN_DEF 1
+#define DRBD_MD_FLUSHES_DEF 1
+#define DRBD_TCP_CORK_DEF 1
+#define DRBD_AL_UPDATES_DEF 1
+
+#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
+#define DRBD_ALWAYS_ASBP_DEF 0
+#define DRBD_USE_RLE_DEF 1
-#undef RANGE
#endif
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
deleted file mode 100644
index a8706f08ab36..000000000000
--- a/include/linux/drbd_nl.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- PAKET( name,
- TYPE ( pn, pr, member )
- ...
- )
-
- You may never reissue one of the pn arguments
-*/
-
-#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
-#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
-#endif
-
-NL_PACKET(primary, 1,
- NL_BIT( 1, T_MAY_IGNORE, primary_force)
-)
-
-NL_PACKET(secondary, 2, )
-
-NL_PACKET(disk_conf, 3,
- NL_INT64( 2, T_MAY_IGNORE, disk_size)
- NL_STRING( 3, T_MANDATORY, backing_dev, 128)
- NL_STRING( 4, T_MANDATORY, meta_dev, 128)
- NL_INTEGER( 5, T_MANDATORY, meta_dev_idx)
- NL_INTEGER( 6, T_MAY_IGNORE, on_io_error)
- NL_INTEGER( 7, T_MAY_IGNORE, fencing)
- NL_BIT( 37, T_MAY_IGNORE, use_bmbv)
- NL_BIT( 53, T_MAY_IGNORE, no_disk_flush)
- NL_BIT( 54, T_MAY_IGNORE, no_md_flush)
- /* 55 max_bio_size was available in 8.2.6rc2 */
- NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
- NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
- NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
- NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
-)
-
-NL_PACKET(detach, 4,
- NL_BIT( 88, T_MANDATORY, detach_force)
-)
-
-NL_PACKET(net_conf, 5,
- NL_STRING( 8, T_MANDATORY, my_addr, 128)
- NL_STRING( 9, T_MANDATORY, peer_addr, 128)
- NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX)
- NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX)
- NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX)
- NL_INTEGER( 14, T_MAY_IGNORE, timeout)
- NL_INTEGER( 15, T_MANDATORY, wire_protocol)
- NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int)
- NL_INTEGER( 17, T_MAY_IGNORE, ping_int)
- NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size)
- NL_INTEGER( 19, T_MAY_IGNORE, max_buffers)
- NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark)
- NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size)
- NL_INTEGER( 22, T_MAY_IGNORE, ko_count)
- NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p)
- NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p)
- NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p)
- NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
- NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
- NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
- NL_INTEGER( 81, T_MAY_IGNORE, on_congestion)
- NL_INTEGER( 82, T_MAY_IGNORE, cong_fill)
- NL_INTEGER( 83, T_MAY_IGNORE, cong_extents)
- /* 59 addr_family was available in GIT, never released */
- NL_BIT( 60, T_MANDATORY, mind_af)
- NL_BIT( 27, T_MAY_IGNORE, want_lose)
- NL_BIT( 28, T_MAY_IGNORE, two_primaries)
- NL_BIT( 41, T_MAY_IGNORE, always_asbp)
- NL_BIT( 61, T_MAY_IGNORE, no_cork)
- NL_BIT( 62, T_MANDATORY, auto_sndbuf_size)
- NL_BIT( 70, T_MANDATORY, dry_run)
-)
-
-NL_PACKET(disconnect, 6,
- NL_BIT( 84, T_MAY_IGNORE, force)
-)
-
-NL_PACKET(resize, 7,
- NL_INT64( 29, T_MAY_IGNORE, resize_size)
- NL_BIT( 68, T_MAY_IGNORE, resize_force)
- NL_BIT( 69, T_MANDATORY, no_resync)
-)
-
-NL_PACKET(syncer_conf, 8,
- NL_INTEGER( 30, T_MAY_IGNORE, rate)
- NL_INTEGER( 31, T_MAY_IGNORE, after)
- NL_INTEGER( 32, T_MAY_IGNORE, al_extents)
-/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
- * NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
- * NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
- * NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
- * feature will be reimplemented differently with 8.3.9 */
- NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
- NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
- NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
- NL_BIT( 65, T_MAY_IGNORE, use_rle)
- NL_INTEGER( 75, T_MAY_IGNORE, on_no_data)
- NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead)
- NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target)
- NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target)
- NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate)
- NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate)
-)
-
-NL_PACKET(invalidate, 9, )
-NL_PACKET(invalidate_peer, 10, )
-NL_PACKET(pause_sync, 11, )
-NL_PACKET(resume_sync, 12, )
-NL_PACKET(suspend_io, 13, )
-NL_PACKET(resume_io, 14, )
-NL_PACKET(outdate, 15, )
-NL_PACKET(get_config, 16, )
-NL_PACKET(get_state, 17,
- NL_INTEGER( 33, T_MAY_IGNORE, state_i)
-)
-
-NL_PACKET(get_uuids, 18,
- NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64)))
- NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags)
-)
-
-NL_PACKET(get_timeout_flag, 19,
- NL_BIT( 36, T_MAY_IGNORE, use_degraded)
-)
-
-NL_PACKET(call_helper, 20,
- NL_STRING( 38, T_MAY_IGNORE, helper, 32)
-)
-
-/* Tag nr 42 already allocated in drbd-8.1 development. */
-
-NL_PACKET(sync_progress, 23,
- NL_INTEGER( 43, T_MAY_IGNORE, sync_progress)
-)
-
-NL_PACKET(dump_ee, 24,
- NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32)
- NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX)
- NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX)
- NL_INT64( 48, T_MAY_IGNORE, ee_sector)
- NL_INT64( 49, T_MAY_IGNORE, ee_block_id)
- NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10)
-)
-
-NL_PACKET(start_ov, 25,
- NL_INT64( 66, T_MAY_IGNORE, start_sector)
-)
-
-NL_PACKET(new_c_uuid, 26,
- NL_BIT( 63, T_MANDATORY, clear_bm)
-)
-
-#ifdef NL_RESPONSE
-NL_RESPONSE(return_code_only, 27)
-#endif
-
-#undef NL_PACKET
-#undef NL_INTEGER
-#undef NL_INT64
-#undef NL_BIT
-#undef NL_STRING
-#undef NL_RESPONSE
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
deleted file mode 100644
index 82de1f9e48b1..000000000000
--- a/include/linux/drbd_tag_magic.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef DRBD_TAG_MAGIC_H
-#define DRBD_TAG_MAGIC_H
-
-#define TT_END 0
-#define TT_REMOVED 0xE000
-
-/* declare packet_type enums */
-enum packet_types {
-#define NL_PACKET(name, number, fields) P_ ## name = number,
-#define NL_RESPONSE(name, number) P_ ## name = number,
-#define NL_INTEGER(pn, pr, member)
-#define NL_INT64(pn, pr, member)
-#define NL_BIT(pn, pr, member)
-#define NL_STRING(pn, pr, member, len)
-#include <linux/drbd_nl.h>
- P_nl_after_last_packet,
-};
-
-/* These struct are used to deduce the size of the tag lists: */
-#define NL_PACKET(name, number, fields) \
- struct name ## _tag_len_struct { fields };
-#define NL_INTEGER(pn, pr, member) \
- int member; int tag_and_len ## member;
-#define NL_INT64(pn, pr, member) \
- __u64 member; int tag_and_len ## member;
-#define NL_BIT(pn, pr, member) \
- unsigned char member:1; int tag_and_len ## member;
-#define NL_STRING(pn, pr, member, len) \
- unsigned char member[len]; int member ## _len; \
- int tag_and_len ## member;
-#include <linux/drbd_nl.h>
-
-/* declare tag-list-sizes */
-static const int tag_list_sizes[] = {
-#define NL_PACKET(name, number, fields) 2 fields ,
-#define NL_INTEGER(pn, pr, member) + 4 + 4
-#define NL_INT64(pn, pr, member) + 4 + 8
-#define NL_BIT(pn, pr, member) + 4 + 1
-#define NL_STRING(pn, pr, member, len) + 4 + (len)
-#include <linux/drbd_nl.h>
-};
-
-/* The two highest bits are used for the tag type */
-#define TT_MASK 0xC000
-#define TT_INTEGER 0x0000
-#define TT_INT64 0x4000
-#define TT_BIT 0x8000
-#define TT_STRING 0xC000
-/* The next bit indicates if processing of the tag is mandatory */
-#define T_MANDATORY 0x2000
-#define T_MAY_IGNORE 0x0000
-#define TN_MASK 0x1fff
-/* The remaining 13 bits are used to enumerate the tags */
-
-#define tag_type(T) ((T) & TT_MASK)
-#define tag_number(T) ((T) & TN_MASK)
-
-/* declare tag enums */
-#define NL_PACKET(name, number, fields) fields
-enum drbd_tags {
-#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr ,
-#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr ,
-#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr ,
-#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr ,
-#include <linux/drbd_nl.h>
-};
-
-struct tag {
- const char *name;
- int type_n_flags;
- int max_len;
-};
-
-/* declare tag names */
-#define NL_PACKET(name, number, fields) fields
-static const struct tag tag_descriptions[] = {
-#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) },
-#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) },
-#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) },
-#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) },
-#include <linux/drbd_nl.h>
-};
-
-#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5abf703d06ba..a823d4be38e7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1810,6 +1810,8 @@ struct file_system_type {
#define FS_REQUIRES_DEV 1
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
+#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
+#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
struct dentry *(*mount) (struct file_system_type *, int,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4f440b3e89fe..79b8bba19363 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -88,10 +88,14 @@ struct disk_stats {
};
#define PARTITION_META_INFO_VOLNAMELTH 64
-#define PARTITION_META_INFO_UUIDLTH 16
+/*
+ * Enough for the string representation of any kind of UUID plus NULL.
+ * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
+ */
+#define PARTITION_META_INFO_UUIDLTH 37
struct partition_meta_info {
- u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */
+ char uuid[PARTITION_META_INFO_UUIDLTH];
u8 volname[PARTITION_META_INFO_VOLNAMELTH];
};
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
new file mode 100644
index 000000000000..023bc346b877
--- /dev/null
+++ b/include/linux/genl_magic_func.h
@@ -0,0 +1,422 @@
+#ifndef GENL_MAGIC_FUNC_H
+#define GENL_MAGIC_FUNC_H
+
+#include <linux/genl_magic_struct.h>
+
+/*
+ * Magic: declare tla policy {{{1
+ * Magic: declare nested policies
+ * {{{2
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+ [tag_name] = { .type = NLA_NESTED },
+
+static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+static struct nla_policy s_name ## _nl_policy[] __read_mostly = \
+{ s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \
+ __put, __is_signed) \
+ [attr_nr] = { .type = nla_type },
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \
+ __get, __put, __is_signed) \
+ [attr_nr] = { .type = nla_type, \
+ .len = maxlen - (nla_type == NLA_NUL_STRING) },
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#ifndef __KERNEL__
+#ifndef pr_info
+#define pr_info(args...) fprintf(stderr, args);
+#endif
+#endif
+
+#ifdef GENL_MAGIC_DEBUG
+static void dprint_field(const char *dir, int nla_type,
+ const char *name, void *valp)
+{
+ __u64 val = valp ? *(__u32 *)valp : 1;
+ switch (nla_type) {
+ case NLA_U8: val = (__u8)val;
+ case NLA_U16: val = (__u16)val;
+ case NLA_U32: val = (__u32)val;
+ pr_info("%s attr %s: %d 0x%08x\n", dir,
+ name, (int)val, (unsigned)val);
+ break;
+ case NLA_U64:
+ val = *(__u64*)valp;
+ pr_info("%s attr %s: %lld 0x%08llx\n", dir,
+ name, (long long)val, (unsigned long long)val);
+ break;
+ case NLA_FLAG:
+ if (val)
+ pr_info("%s attr %s: set\n", dir, name);
+ break;
+ }
+}
+
+static void dprint_array(const char *dir, int nla_type,
+ const char *name, const char *val, unsigned len)
+{
+ switch (nla_type) {
+ case NLA_NUL_STRING:
+ if (len && val[len-1] == '\0')
+ len--;
+ pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
+ break;
+ default:
+ /* we can always show 4 byte,
+ * thats what nlattr are aligned to. */
+ pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
+ dir, name, len, val[0], val[1], val[2], val[3]);
+ }
+}
+
+#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
+
+/* Name is a member field name of the struct s.
+ * If s is NULL (only parsing, no copy requested in *_from_attrs()),
+ * nla is supposed to point to the attribute containing the information
+ * corresponding to that struct member. */
+#define DPRINT_FIELD(dir, nla_type, name, s, nla) \
+ do { \
+ if (s) \
+ dprint_field(dir, nla_type, #name, &s->name); \
+ else if (nla) \
+ dprint_field(dir, nla_type, #name, \
+ (nla_type == NLA_FLAG) ? NULL \
+ : nla_data(nla)); \
+ } while (0)
+
+#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \
+ do { \
+ if (s) \
+ dprint_array(dir, nla_type, #name, \
+ s->name, s->name ## _len); \
+ else if (nla) \
+ dprint_array(dir, nla_type, #name, \
+ nla_data(nla), nla_len(nla)); \
+ } while (0)
+#else
+#define DPRINT_TLA(a, op, b) do {} while (0)
+#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
+#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
+#endif
+
+/*
+ * Magic: provide conversion functions {{{1
+ * populate struct from attribute table:
+ * {{{2
+ */
+
+/* processing of generic netlink messages is serialized.
+ * use one static buffer for parsing of nested attributes */
+static struct nlattr *nested_attr_tb[128];
+
+#ifndef BUILD_BUG_ON
+/* Force a compilation error if condition is true */
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+/* Force a compilation error if condition is true, but also produce a
+ result (of value 0 and type size_t), so the expression can be used
+ e.g. in a structure initializer (or where-ever else comma expressions
+ aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+#endif
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+/* *_from_attrs functions are static, but potentially unused */ \
+static int __ ## s_name ## _from_attrs(struct s_name *s, \
+ struct genl_info *info, bool exclude_invariants) \
+{ \
+ const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \
+ struct nlattr *tla = info->attrs[tag_number]; \
+ struct nlattr **ntb = nested_attr_tb; \
+ struct nlattr *nla; \
+ int err; \
+ BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \
+ if (!tla) \
+ return -ENOMSG; \
+ DPRINT_TLA(#s_name, "<=-", #tag_name); \
+ err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
+ if (err) \
+ return err; \
+ \
+ s_fields \
+ return 0; \
+} __attribute__((unused)) \
+static int s_name ## _from_attrs(struct s_name *s, \
+ struct genl_info *info) \
+{ \
+ return __ ## s_name ## _from_attrs(s, info, false); \
+} __attribute__((unused)) \
+static int s_name ## _from_attrs_for_change(struct s_name *s, \
+ struct genl_info *info) \
+{ \
+ return __ ## s_name ## _from_attrs(s, info, true); \
+} __attribute__((unused)) \
+
+#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \
+ nla = ntb[attr_nr]; \
+ if (nla) { \
+ if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
+ pr_info("<< must not change invariant attr: %s\n", #name); \
+ return -EEXIST; \
+ } \
+ assignment; \
+ } else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
+ /* attribute missing from payload, */ \
+ /* which was expected */ \
+ } else if ((attr_flag) & DRBD_F_REQUIRED) { \
+ pr_info("<< missing attr: %s\n", #name); \
+ return -ENOMSG; \
+ }
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
+ __is_signed) \
+ __assign(attr_nr, attr_flag, name, nla_type, type, \
+ if (s) \
+ s->name = __get(nla); \
+ DPRINT_FIELD("<<", nla_type, name, s, nla))
+
+/* validate_nla() already checked nla_len <= maxlen appropriately. */
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
+ __get, __put, __is_signed) \
+ __assign(attr_nr, attr_flag, name, nla_type, type, \
+ if (s) \
+ s->name ## _len = \
+ __get(s->name, nla, maxlen); \
+ DPRINT_ARRAY("<<", nla_type, name, s, nla))
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+/*
+ * Magic: define op number to op name mapping {{{1
+ * {{{2
+ */
+const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
+{
+ switch (cmd) {
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list) \
+ case op_num: return #op_name;
+#include GENL_MAGIC_INCLUDE_FILE
+ default:
+ return "unknown";
+ }
+}
+
+#ifdef __KERNEL__
+#include <linux/stringify.h>
+/*
+ * Magic: define genl_ops {{{1
+ * {{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list) \
+{ \
+ handler \
+ .cmd = op_name, \
+ .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \
+},
+
+#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
+static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+/*
+ * Define the genl_family, multicast groups, {{{1
+ * and provide register/unregister functions.
+ * {{{2
+ */
+#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
+static struct genl_family ZZZ_genl_family __read_mostly = {
+ .id = GENL_ID_GENERATE,
+ .name = __stringify(GENL_MAGIC_FAMILY),
+ .version = GENL_MAGIC_VERSION,
+#ifdef GENL_MAGIC_FAMILY_HDRSZ
+ .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
+#endif
+ .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+};
+
+/*
+ * Magic: define multicast groups
+ * Magic: define multicast group registration helper
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group) \
+static struct genl_multicast_group \
+CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \
+ .name = #group, \
+}; \
+static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \
+ struct sk_buff *skb, gfp_t flags) \
+{ \
+ unsigned int group_id = \
+ CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \
+ if (!group_id) \
+ return -EINVAL; \
+ return genlmsg_multicast(skb, 0, group_id, flags); \
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
+{
+ int err = genl_register_family_with_ops(&ZZZ_genl_family,
+ ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
+ if (err)
+ return err;
+#undef GENL_mc_group
+#define GENL_mc_group(group) \
+ err = genl_register_mc_group(&ZZZ_genl_family, \
+ &CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \
+ if (err) \
+ goto fail; \
+ else \
+ pr_info("%s: mcg %s: %u\n", #group, \
+ __stringify(GENL_MAGIC_FAMILY), \
+ CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+ return 0;
+fail:
+ genl_unregister_family(&ZZZ_genl_family);
+ return err;
+}
+
+void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
+{
+ genl_unregister_family(&ZZZ_genl_family);
+}
+
+/*
+ * Magic: provide conversion functions {{{1
+ * populate skb from struct.
+ * {{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \
+ const bool exclude_sensitive) \
+{ \
+ struct nlattr *tla = nla_nest_start(skb, tag_number); \
+ if (!tla) \
+ goto nla_put_failure; \
+ DPRINT_TLA(#s_name, "-=>", #tag_name); \
+ s_fields \
+ nla_nest_end(skb, tla); \
+ return 0; \
+ \
+nla_put_failure: \
+ if (tla) \
+ nla_nest_cancel(skb, tla); \
+ return -EMSGSIZE; \
+} \
+static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \
+ struct s_name *s) \
+{ \
+ return s_name ## _to_skb(skb, s, 0); \
+} \
+static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \
+ struct s_name *s) \
+{ \
+ return s_name ## _to_skb(skb, s, 1); \
+}
+
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
+ __is_signed) \
+ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
+ DPRINT_FIELD(">>", nla_type, name, s, NULL); \
+ if (__put(skb, attr_nr, s->name)) \
+ goto nla_put_failure; \
+ }
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
+ __get, __put, __is_signed) \
+ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
+ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \
+ if (__put(skb, attr_nr, min_t(int, maxlen, \
+ s->name ## _len + (nla_type == NLA_NUL_STRING)),\
+ s->name)) \
+ goto nla_put_failure; \
+ }
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+
+/* Functions for initializing structs to default values. */
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
+ __is_signed)
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
+ __get, __put, __is_signed)
+#undef __u32_field_def
+#define __u32_field_def(attr_nr, attr_flag, name, default) \
+ x->name = default;
+#undef __s32_field_def
+#define __s32_field_def(attr_nr, attr_flag, name, default) \
+ x->name = default;
+#undef __flg_field_def
+#define __flg_field_def(attr_nr, attr_flag, name, default) \
+ x->name = default;
+#undef __str_field_def
+#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
+ memset(x->name, 0, sizeof(x->name)); \
+ x->name ## _len = 0;
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
+static void set_ ## s_name ## _defaults(struct s_name *x) { \
+s_fields \
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#endif /* __KERNEL__ */
+
+/* }}}1 */
+#endif /* GENL_MAGIC_FUNC_H */
+/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
new file mode 100644
index 000000000000..eecd19b37001
--- /dev/null
+++ b/include/linux/genl_magic_struct.h
@@ -0,0 +1,277 @@
+#ifndef GENL_MAGIC_STRUCT_H
+#define GENL_MAGIC_STRUCT_H
+
+#ifndef GENL_MAGIC_FAMILY
+# error "you need to define GENL_MAGIC_FAMILY before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_VERSION
+# error "you need to define GENL_MAGIC_VERSION before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_INCLUDE_FILE
+# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
+#endif
+
+#include <linux/genetlink.h>
+#include <linux/types.h>
+
+#define CONCAT__(a,b) a ## b
+#define CONCAT_(a,b) CONCAT__(a,b)
+
+extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
+extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
+
+/*
+ * Extension of genl attribute validation policies {{{2
+ */
+
+/*
+ * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
+ * know about. This flag can be set in nlattr->nla_type to indicate that this
+ * attribute must not be ignored.
+ *
+ * We check and remove this flag in drbd_nla_check_mandatory() before
+ * validating the attribute types and lengths via nla_parse_nested().
+ */
+#define DRBD_GENLA_F_MANDATORY (1 << 14)
+
+/*
+ * Flags specific to drbd and not visible at the netlink layer, used in
+ * <struct>_from_attrs and <struct>_to_skb:
+ *
+ * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
+ * invalid.
+ *
+ * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
+ * included in unpriviledged get requests or broadcasts.
+ *
+ * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
+ * cannot subsequently be changed.
+ */
+#define DRBD_F_REQUIRED (1 << 0)
+#define DRBD_F_SENSITIVE (1 << 1)
+#define DRBD_F_INVARIANT (1 << 2)
+
+#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
+
+/* }}}1
+ * MAGIC
+ * multi-include macro expansion magic starts here
+ */
+
+/* MAGIC helpers {{{2 */
+
+/* possible field types */
+#define __flg_field(attr_nr, attr_flag, name) \
+ __field(attr_nr, attr_flag, name, NLA_U8, char, \
+ nla_get_u8, nla_put_u8, false)
+#define __u8_field(attr_nr, attr_flag, name) \
+ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
+ nla_get_u8, nla_put_u8, false)
+#define __u16_field(attr_nr, attr_flag, name) \
+ __field(attr_nr, attr_flag, name, NLA_U16, __u16, \
+ nla_get_u16, nla_put_u16, false)
+#define __u32_field(attr_nr, attr_flag, name) \
+ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \
+ nla_get_u32, nla_put_u32, false)
+#define __s32_field(attr_nr, attr_flag, name) \
+ __field(attr_nr, attr_flag, name, NLA_U32, __s32, \
+ nla_get_u32, nla_put_u32, true)
+#define __u64_field(attr_nr, attr_flag, name) \
+ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \
+ nla_get_u64, nla_put_u64, false)
+#define __str_field(attr_nr, attr_flag, name, maxlen) \
+ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
+ nla_strlcpy, nla_put, false)
+#define __bin_field(attr_nr, attr_flag, name, maxlen) \
+ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
+ nla_memcpy, nla_put, false)
+
+/* fields with default values */
+#define __flg_field_def(attr_nr, attr_flag, name, default) \
+ __flg_field(attr_nr, attr_flag, name)
+#define __u32_field_def(attr_nr, attr_flag, name, default) \
+ __u32_field(attr_nr, attr_flag, name)
+#define __s32_field_def(attr_nr, attr_flag, name, default) \
+ __s32_field(attr_nr, attr_flag, name)
+#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
+ __str_field(attr_nr, attr_flag, name, maxlen)
+
+#define GENL_op_init(args...) args
+#define GENL_doit(handler) \
+ .doit = handler, \
+ .flags = GENL_ADMIN_PERM,
+#define GENL_dumpit(handler) \
+ .dumpit = handler, \
+ .flags = GENL_ADMIN_PERM,
+
+/* }}}1
+ * Magic: define the enum symbols for genl_ops
+ * Magic: define the enum symbols for top level attributes
+ * Magic: define the enum symbols for nested attributes
+ * {{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
+ op_name = op_num,
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list) \
+ op_name = op_num,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+ tag_name = tag_number,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+enum { \
+ s_fields \
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, \
+ __get, __put, __is_signed) \
+ T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, \
+ maxlen, __get, __put, __is_signed) \
+ T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/* }}}1
+ * Magic: compile time assert unique numbers for operations
+ * Magic: -"- unique numbers for top level attributes
+ * Magic: -"- unique numbers for nested attributes
+ * {{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list) \
+ case op_name:
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
+ case op_name:
+
+static inline void ct_assert_unique_operations(void)
+{
+ switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+ ;
+ }
+}
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+ case tag_number:
+
+static inline void ct_assert_unique_top_level_attributes(void)
+{
+ switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+ ;
+ }
+}
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+static inline void ct_assert_unique_ ## s_name ## _attributes(void) \
+{ \
+ switch (0) { \
+ s_fields \
+ ; \
+ } \
+}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
+ __is_signed) \
+ case attr_nr:
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
+ __get, __put, __is_signed) \
+ case attr_nr:
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/* }}}1
+ * Magic: declare structs
+ * struct <name> {
+ * fields
+ * };
+ * {{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+struct s_name { s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
+ __is_signed) \
+ type name;
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
+ __get, __put, __is_signed) \
+ type name[maxlen]; \
+ __u32 name ## _len;
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+enum { \
+ s_fields \
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
+ is_signed) \
+ F_ ## name ## _IS_SIGNED = is_signed,
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
+ __get, __put, is_signed) \
+ F_ ## name ## _IS_SIGNED = is_signed,
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/* }}}1 */
+#endif /* GENL_MAGIC_STRUCT_H */
+/* vim: set foldmethod=marker nofoldenable : */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 87259a44c251..de7e190f1af4 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
void __init idr_init_cache(void);
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp: idr handle
+ * @entry: the type * to use as cursor
+ * @id: id entry's key
+ */
+#define idr_for_each_entry(idp, entry, id) \
+ for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
+ entry != NULL; \
+ ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+
#endif /* __IDR_H__ */
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 5499c92a9153..fe771978e877 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -67,6 +67,8 @@ struct ipc_namespace {
/* user_ns which owns the ipc ns */
struct user_namespace *user_ns;
+
+ unsigned int proc_inum;
};
extern struct ipc_namespace init_ipc_ns;
@@ -133,7 +135,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
#if defined(CONFIG_IPC_NS)
extern struct ipc_namespace *copy_ipcs(unsigned long flags,
- struct task_struct *tsk);
+ struct user_namespace *user_ns, struct ipc_namespace *ns);
+
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
{
if (ns)
@@ -144,12 +147,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
extern void put_ipc_ns(struct ipc_namespace *ns);
#else
static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
- struct task_struct *tsk)
+ struct user_namespace *user_ns, struct ipc_namespace *ns)
{
if (flags & CLONE_NEWIPC)
return ERR_PTR(-EINVAL);
- return tsk->nsproxy->ipc_ns;
+ return ns;
}
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 6492181bcb1d..460b60fa7adf 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -53,10 +53,13 @@ struct loop_device {
spinlock_t lo_lock;
struct bio_list lo_bio_list;
+ unsigned int lo_bio_count;
int lo_state;
struct mutex lo_ctl_mutex;
struct task_struct *lo_thread;
wait_queue_head_t lo_event;
+ /* wait queue for incoming requests */
+ wait_queue_head_t lo_req_wait;
struct request_queue *lo_queue;
struct gendisk *lo_disk;
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index cafc7f99e124..4019013c6593 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -166,9 +166,11 @@ struct lc_element {
/* if we want to track a larger set of objects,
* it needs to become arch independend u64 */
unsigned lc_number;
-
/* special label when on free list */
#define LC_FREE (~0U)
+
+ /* for pending changes */
+ unsigned lc_new_number;
};
struct lru_cache {
@@ -176,6 +178,7 @@ struct lru_cache {
struct list_head lru;
struct list_head free;
struct list_head in_use;
+ struct list_head to_be_changed;
/* the pre-created kmem cache to allocate the objects from */
struct kmem_cache *lc_cache;
@@ -186,7 +189,7 @@ struct lru_cache {
size_t element_off;
/* number of elements (indices) */
- unsigned int nr_elements;
+ unsigned int nr_elements;
/* Arbitrary limit on maximum tracked objects. Practical limit is much
* lower due to allocation failures, probably. For typical use cases,
* nr_elements should be a few thousand at most.
@@ -194,18 +197,19 @@ struct lru_cache {
* 8 high bits of .lc_index to be overloaded with flags in the future. */
#define LC_MAX_ACTIVE (1<<24)
+ /* allow to accumulate a few (index:label) changes,
+ * but no more than max_pending_changes */
+ unsigned int max_pending_changes;
+ /* number of elements currently on to_be_changed list */
+ unsigned int pending_changes;
+
/* statistics */
- unsigned used; /* number of lelements currently on in_use list */
- unsigned long hits, misses, starving, dirty, changed;
+ unsigned used; /* number of elements currently on in_use list */
+ unsigned long hits, misses, starving, locked, changed;
/* see below: flag-bits for lru_cache */
unsigned long flags;
- /* when changing the label of an index element */
- unsigned int new_number;
-
- /* for paranoia when changing the label of an index element */
- struct lc_element *changing_element;
void *lc_private;
const char *name;
@@ -221,10 +225,15 @@ enum {
/* debugging aid, to catch concurrent access early.
* user needs to guarantee exclusive access by proper locking! */
__LC_PARANOIA,
- /* if we need to change the set, but currently there is a changing
- * transaction pending, we are "dirty", and must deferr further
- * changing requests */
+
+ /* annotate that the set is "dirty", possibly accumulating further
+ * changes, until a transaction is finally triggered */
__LC_DIRTY,
+
+ /* Locked, no further changes allowed.
+ * Also used to serialize changing transactions. */
+ __LC_LOCKED,
+
/* if we need to change the set, but currently there is no free nor
* unused element available, we are "starving", and must not give out
* further references, to guarantee that eventually some refcnt will
@@ -236,9 +245,11 @@ enum {
};
#define LC_PARANOIA (1<<__LC_PARANOIA)
#define LC_DIRTY (1<<__LC_DIRTY)
+#define LC_LOCKED (1<<__LC_LOCKED)
#define LC_STARVING (1<<__LC_STARVING)
extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+ unsigned max_pending_changes,
unsigned e_count, size_t e_size, size_t e_off);
extern void lc_reset(struct lru_cache *lc);
extern void lc_destroy(struct lru_cache *lc);
@@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
-extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
+extern void lc_committed(struct lru_cache *lc);
struct seq_file;
extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
@@ -258,32 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
void (*detail) (struct seq_file *, struct lc_element *));
/**
- * lc_try_lock - can be used to stop lc_get() from changing the tracked set
+ * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
* @lc: the lru cache to operate on
*
- * Note that the reference counts and order on the active and lru lists may
- * still change. Returns true if we acquired the lock.
+ * Allows (expects) the set to be "dirty". Note that the reference counts and
+ * order on the active and lru lists may still change. Used to serialize
+ * changing transactions. Returns true if we aquired the lock.
*/
-static inline int lc_try_lock(struct lru_cache *lc)
+static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
{
- return !test_and_set_bit(__LC_DIRTY, &lc->flags);
+ return !test_and_set_bit(__LC_LOCKED, &lc->flags);
}
/**
+ * lc_try_lock - variant to stop lc_get() from changing the tracked set
+ * @lc: the lru cache to operate on
+ *
+ * Note that the reference counts and order on the active and lru lists may
+ * still change. Only works on a "clean" set. Returns true if we aquired the
+ * lock, which means there are no pending changes, and any further attempt to
+ * change the set will not succeed until the next lc_unlock().
+ */
+extern int lc_try_lock(struct lru_cache *lc);
+
+/**
* lc_unlock - unlock @lc, allow lc_get() to change the set again
* @lc: the lru cache to operate on
*/
static inline void lc_unlock(struct lru_cache *lc)
{
clear_bit(__LC_DIRTY, &lc->flags);
- smp_mb__after_clear_bit();
+ clear_bit_unlock(__LC_LOCKED, &lc->flags);
}
-static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
-{
- struct lc_element *e = lc_find(lc, enr);
- return e && e->refcnt;
-}
+extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
#define lc_entry(ptr, type, member) \
container_of(ptr, type, member)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 5a8e3903d770..12b2ab510323 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -4,9 +4,10 @@
struct mnt_namespace;
struct fs_struct;
+struct user_namespace;
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
- struct fs_struct *);
+ struct user_namespace *, struct fs_struct *);
extern void put_mnt_ns(struct mnt_namespace *ns);
extern const struct file_operations proc_mounts_operations;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cc37a55ad004..10e5947491c7 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
void free_nsproxy(struct nsproxy *ns);
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
- struct fs_struct *);
+ struct cred *, struct fs_struct *);
int __init nsproxy_cache_init(void);
static inline void put_nsproxy(struct nsproxy *ns)
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 65e3e87eacc5..bf285999273a 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,6 +21,7 @@ struct pid_namespace {
struct kref kref;
struct pidmap pidmap[PIDMAP_ENTRIES];
int last_pid;
+ int nr_hashed;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
@@ -31,9 +32,12 @@ struct pid_namespace {
#ifdef CONFIG_BSD_PROCESS_ACCT
struct bsd_acct_struct *bacct;
#endif
+ struct user_namespace *user_ns;
+ struct work_struct proc_work;
kgid_t pid_gid;
int hide_pid;
int reboot; /* group exit code if this pidns was rebooted */
+ unsigned int proc_inum;
};
extern struct pid_namespace init_pid_ns;
@@ -46,7 +50,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
-extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
+extern struct pid_namespace *copy_pid_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct pid_namespace *ns);
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
extern void put_pid_ns(struct pid_namespace *ns);
@@ -59,8 +64,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
-static inline struct pid_namespace *
-copy_pid_ns(unsigned long flags, struct pid_namespace *ns)
+static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct pid_namespace *ns)
{
if (flags & CLONE_NEWPID)
ns = ERR_PTR(-EINVAL);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index b4f70f0a9a48..32676b35d2f5 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -28,7 +28,11 @@ struct mm_struct;
*/
enum {
- PROC_ROOT_INO = 1,
+ PROC_ROOT_INO = 1,
+ PROC_IPC_INIT_INO = 0xEFFFFFFFU,
+ PROC_UTS_INIT_INO = 0xEFFFFFFEU,
+ PROC_USER_INIT_INO = 0xEFFFFFFDU,
+ PROC_PID_INIT_INO = 0xEFFFFFFCU,
};
/*
@@ -174,7 +178,10 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
struct proc_dir_entry *parent);
extern struct file *proc_ns_fget(int fd);
+extern bool proc_ns_inode(struct inode *inode);
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
#else
#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
@@ -229,6 +236,19 @@ static inline struct file *proc_ns_fget(int fd)
return ERR_PTR(-EINVAL);
}
+static inline bool proc_ns_inode(struct inode *inode)
+{
+ return false;
+}
+
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+ *inum = 1;
+ return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
#endif /* CONFIG_PROC_FS */
#if !defined(CONFIG_PROC_KCORE)
@@ -247,10 +267,14 @@ struct proc_ns_operations {
void *(*get)(struct task_struct *task);
void (*put)(void *ns);
int (*install)(struct nsproxy *nsproxy, void *ns);
+ unsigned int (*inum)(void *ns);
};
extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations mntns_operations;
union proc_op {
int (*proc_get_link)(struct dentry *, struct path *);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 95142cae446a..b9bd2e6c73cc 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct user_namespace {
struct user_namespace *parent;
kuid_t owner;
kgid_t group;
+ unsigned int proc_inum;
};
extern struct user_namespace init_user_ns;
@@ -39,6 +40,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
}
extern int create_user_ns(struct cred *new);
+extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
extern void free_user_ns(struct kref *kref);
static inline void put_user_ns(struct user_namespace *ns)
@@ -66,6 +68,14 @@ static inline int create_user_ns(struct cred *new)
return -EINVAL;
}
+static inline int unshare_userns(unsigned long unshare_flags,
+ struct cred **new_cred)
+{
+ if (unshare_flags & CLONE_NEWUSER)
+ return -EINVAL;
+ return 0;
+}
+
static inline void put_user_ns(struct user_namespace *ns)
{
}
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b345206722a..239e27733d6c 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -23,6 +23,7 @@ struct uts_namespace {
struct kref kref;
struct new_utsname name;
struct user_namespace *user_ns;
+ unsigned int proc_inum;
};
extern struct uts_namespace init_uts_ns;
@@ -33,7 +34,7 @@ static inline void get_uts_ns(struct uts_namespace *ns)
}
extern struct uts_namespace *copy_utsname(unsigned long flags,
- struct task_struct *tsk);
+ struct user_namespace *user_ns, struct uts_namespace *old_ns);
extern void free_uts_ns(struct kref *kref);
static inline void put_uts_ns(struct uts_namespace *ns)
@@ -50,12 +51,12 @@ static inline void put_uts_ns(struct uts_namespace *ns)
}
static inline struct uts_namespace *copy_utsname(unsigned long flags,
- struct task_struct *tsk)
+ struct user_namespace *user_ns, struct uts_namespace *old_ns)
{
if (flags & CLONE_NEWUTS)
return ERR_PTR(-EINVAL);
- return tsk->nsproxy->uts_ns;
+ return old_ns;
}
#endif
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 168dfe122dd3..7cb64d4b499d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -550,6 +550,170 @@ do { \
__ret; \
})
+
+#define __wait_event_lock_irq(wq, condition, lock, cmd) \
+do { \
+ DEFINE_WAIT(__wait); \
+ \
+ for (;;) { \
+ prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ spin_unlock_irq(&lock); \
+ cmd; \
+ schedule(); \
+ spin_lock_irq(&lock); \
+ } \
+ finish_wait(&wq, &__wait); \
+} while (0)
+
+/**
+ * wait_event_lock_irq_cmd - sleep until a condition gets true. The
+ * condition is checked under the lock. This
+ * is expected to be called with the lock
+ * taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd
+ * and schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ * sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ */
+#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event_lock_irq(wq, condition, lock, cmd); \
+} while (0)
+
+/**
+ * wait_event_lock_irq - sleep until a condition gets true. The
+ * condition is checked under the lock. This
+ * is expected to be called with the lock
+ * taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ * and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ */
+#define wait_event_lock_irq(wq, condition, lock) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event_lock_irq(wq, condition, lock, ); \
+} while (0)
+
+
+#define __wait_event_interruptible_lock_irq(wq, condition, \
+ lock, ret, cmd) \
+do { \
+ DEFINE_WAIT(__wait); \
+ \
+ for (;;) { \
+ prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ if (signal_pending(current)) { \
+ ret = -ERESTARTSYS; \
+ break; \
+ } \
+ spin_unlock_irq(&lock); \
+ cmd; \
+ schedule(); \
+ spin_lock_irq(&lock); \
+ } \
+ finish_wait(&wq, &__wait); \
+} while (0)
+
+/**
+ * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
+ * The condition is checked under the lock. This is expected to
+ * be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd and
+ * schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ * sleep
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
+({ \
+ int __ret = 0; \
+ \
+ if (!(condition)) \
+ __wait_event_interruptible_lock_irq(wq, condition, \
+ lock, __ret, cmd); \
+ __ret; \
+})
+
+/**
+ * wait_event_interruptible_lock_irq - sleep until a condition gets true.
+ * The condition is checked under the lock. This is expected
+ * to be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ * and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq(wq, condition, lock) \
+({ \
+ int __ret = 0; \
+ \
+ if (!(condition)) \
+ __wait_event_interruptible_lock_irq(wq, condition, \
+ lock, __ret, ); \
+ __ret; \
+})
+
+
/*
* These are the old interfaces to sleep waiting for an event.
* They are racy. DO NOT use them, use the wait_event* interfaces above.
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c5a43f56b796..de644bcd8613 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -56,6 +56,8 @@ struct net {
struct user_namespace *user_ns; /* Owning user namespace */
+ unsigned int proc_inum;
+
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;