From 4677efc486e1872f62d4632c50f7183f82296fa6 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:19 +0300 Subject: devlink: Introduce rate object Allow registering rate object for devlink ports with dedicated devlink_rate_leaf_{create|destroy}() API. Implement new netlink DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info. Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for notifications when creating/deleting leaf rate object. Rate API is intended to be used for rate limiting of individual devlink ports (leafs) and their aggregates (nodes). Example: $ devlink port show pci/0000:03:00.0/0 pci/0000:03:00.0/1 $ devlink port function rate show pci/0000:03:00.0/0: type leaf pci/0000:03:00.0/1: type leaf Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f6008b2fa60f..0c27b45c47db 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -126,6 +126,11 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_TEST, + DEVLINK_CMD_RATE_GET, /* can dump */ + DEVLINK_CMD_RATE_SET, + DEVLINK_CMD_RATE_NEW, + DEVLINK_CMD_RATE_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -206,6 +211,10 @@ enum devlink_port_flavour { */ }; +enum devlink_rate_type { + DEVLINK_RATE_TYPE_LEAF, +}; + enum devlink_param_cmode { DEVLINK_PARAM_CMODE_RUNTIME, DEVLINK_PARAM_CMODE_DRIVERINIT, @@ -534,6 +543,8 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ + + DEVLINK_ATTR_RATE_TYPE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.3-8-gc7d7 From 1897db2ec3109eb1dd07b357c95c5e03d54e41b9 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:22 +0300 Subject: devlink: Allow setting tx rate for devlink rate leaf objects Implement support for DEVLINK_CMD_RATE_SET command with new attributes DEVLINK_ATTR_RATE_TX_{SHARE|MAX} that are used to set devlink rate shared/max tx rate values. Extend devlink ops with new callbacks rate_leaf_tx_{share|max}_set() to allow supporting drivers to implement rate control through devlink. New attributes are optional. Driver implementations are allowed to support either or both of them. Shared rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_share 10mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_share 10mbit Max rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_max 100mbit Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 10 ++++++ include/uapi/linux/devlink.h | 2 ++ net/core/devlink.c | 86 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/devlink.h b/include/net/devlink.h index 2f5954d96c3e..46d553545f83 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -139,6 +139,8 @@ struct devlink_rate { enum devlink_rate_type type; struct devlink *devlink; void *priv; + u64 tx_share; + u64 tx_max; struct devlink_port *devlink_port; }; @@ -1465,6 +1467,14 @@ struct devlink_ops { struct devlink_port *port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); + + /** + * Rate control callbacks. + */ + int (*rate_leaf_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); + int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0c27b45c47db..ae94cd2a1078 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -545,6 +545,8 @@ enum devlink_attr { DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ DEVLINK_ATTR_RATE_TYPE, /* u16 */ + DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ + DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 3b785f51156f..37839fd5ca73 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -803,6 +803,14 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, goto nla_put_failure; } + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE, + devlink_rate->tx_share, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX, + devlink_rate->tx_max, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -1495,6 +1503,76 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, return devlink->ops->port_del(devlink, port_index, extack); } +static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, + const struct devlink_ops *ops, + struct genl_info *info) +{ + struct nlattr **attrs = info->attrs; + u64 rate; + int err; + + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { + rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]); + err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + if (err) + return err; + devlink_rate->tx_share = rate; + } + + if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) { + rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]); + err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + if (err) + return err; + devlink_rate->tx_max = rate; + } + + return 0; +} + +static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, + struct genl_info *info, + enum devlink_rate_type type) +{ + struct nlattr **attrs = info->attrs; + + if (type == DEVLINK_RATE_TYPE_LEAF) { + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); + return false; + } + } else { + WARN_ON("Unknown type of rate object"); + return false; + } + + return true; +} + +static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *devlink_rate = info->user_ptr[1]; + struct devlink *devlink = devlink_rate->devlink; + const struct devlink_ops *ops = devlink->ops; + int err; + + if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type)) + return -EOPNOTSUPP; + + err = devlink_nl_rate_set(devlink_rate, ops, info); + + if (!err) + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); + return err; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -7958,6 +8036,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -7990,6 +8070,12 @@ static const struct genl_small_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, /* can be retrieved by unprivileged users */ }, + { + .cmd = DEVLINK_CMD_RATE_SET, + .doit = devlink_nl_cmd_rate_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, + }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, -- cgit v1.3-8-gc7d7 From a8ecb93ef03de4c59fb6289f99bc9616a852c917 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:25 +0300 Subject: devlink: Introduce rate nodes Implement support for DEVLINK_CMD_RATE_{NEW|DEL} commands that are used to create and delete devlink rate nodes. Add new attribute DEVLINK_ATTR_RATE_NODE_NAME that specify node name string. The node name is an alphanumeric identifier. No valid node name can be a devlink port index, eg. decimal number. Extend devlink ops with new callbacks rate_node_{new|del}() and rate_node_tx_{share|max}_set() to allow supporting drivers to implement ports rate grouping and setting tx rate of rate nodes through devlink. Expose devlink_rate_nodes_destroy() function to allow vendor driver do proper cleanup of internally allocated resources for the nodes if the driver goes down or due to any other reasons which requires nodes to be destroyed. Disallow moving device from switchdev to legacy mode if any node exists on that device. User must explicitly delete nodes before switching mode. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate set netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit Add + set command can be combined: $ devlink port function rate add netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node tx_share 10mbit tx_max 100mbit $ devlink port function rate del netdevsim/netdevsim10/group1 Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 14 ++- include/uapi/linux/devlink.h | 3 + net/core/devlink.c | 238 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 247 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/devlink.h b/include/net/devlink.h index 46d553545f83..13162b579124 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -142,7 +142,10 @@ struct devlink_rate { u64 tx_share; u64 tx_max; - struct devlink_port *devlink_port; + union { + struct devlink_port *devlink_port; + char *name; + }; }; struct devlink_port { @@ -1475,6 +1478,14 @@ struct devlink_ops { u64 tx_share, struct netlink_ext_ack *extack); int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); + int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_new)(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); + int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) @@ -1536,6 +1547,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, bool external); int devlink_rate_leaf_create(struct devlink_port *port, void *priv); void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); +void devlink_rate_nodes_destroy(struct devlink *devlink); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ae94cd2a1078..7e15853b77fe 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -213,6 +213,7 @@ enum devlink_port_flavour { enum devlink_rate_type { DEVLINK_RATE_TYPE_LEAF, + DEVLINK_RATE_TYPE_NODE, }; enum devlink_param_cmode { @@ -547,6 +548,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TYPE, /* u16 */ DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ + DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 37839fd5ca73..589d750b70e4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -196,6 +196,12 @@ devlink_rate_is_leaf(struct devlink_rate *devlink_rate) return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF; } +static inline bool +devlink_rate_is_node(struct devlink_rate *devlink_rate) +{ + return devlink_rate->type == DEVLINK_RATE_TYPE_NODE; +} + static struct devlink_rate * devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info) { @@ -209,6 +215,55 @@ devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info) return devlink_rate ?: ERR_PTR(-ENODEV); } +static struct devlink_rate * +devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name) +{ + static struct devlink_rate *devlink_rate; + + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + if (devlink_rate_is_node(devlink_rate) && + !strcmp(node_name, devlink_rate->name)) + return devlink_rate; + } + return ERR_PTR(-ENODEV); +} + +static struct devlink_rate * +devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) +{ + const char *rate_node_name; + size_t len; + + if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME]) + return ERR_PTR(-EINVAL); + rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]); + len = strlen(rate_node_name); + /* Name cannot be empty or decimal number */ + if (!len || strspn(rate_node_name, "0123456789") == len) + return ERR_PTR(-EINVAL); + + return devlink_rate_node_get_by_name(devlink, rate_node_name); +} + +static struct devlink_rate * +devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + return devlink_rate_node_get_from_attrs(devlink, info->attrs); +} + +static struct devlink_rate * +devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + struct nlattr **attrs = info->attrs; + + if (attrs[DEVLINK_ATTR_PORT_INDEX]) + return devlink_rate_leaf_get_from_info(devlink, info); + else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME]) + return devlink_rate_node_get_from_info(devlink, info); + else + return ERR_PTR(-EINVAL); +} + struct devlink_sb { struct list_head list; unsigned int index; @@ -428,12 +483,13 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_RATE BIT(2) +#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(3) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -465,12 +521,21 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) { struct devlink_rate *devlink_rate; - devlink_rate = devlink_rate_leaf_get_from_info(devlink, info); + devlink_rate = devlink_rate_get_from_info(devlink, info); if (IS_ERR(devlink_rate)) { err = PTR_ERR(devlink_rate); goto unlock; } info->user_ptr[1] = devlink_rate; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) { + struct devlink_rate *rate_node; + + rate_node = devlink_rate_node_get_from_info(devlink, info); + if (IS_ERR(rate_node)) { + err = PTR_ERR(rate_node); + goto unlock; + } + info->user_ptr[1] = rate_node; } return 0; @@ -801,6 +866,10 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_rate->devlink_port->index)) goto nla_put_failure; + } else if (devlink_rate_is_node(devlink_rate)) { + if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME, + devlink_rate->name)) + goto nla_put_failure; } if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE, @@ -1508,13 +1577,17 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, struct genl_info *info) { struct nlattr **attrs = info->attrs; + int err = -EOPNOTSUPP; u64 rate; - int err; if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]); - err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, - rate, info->extack); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); if (err) return err; devlink_rate->tx_share = rate; @@ -1522,8 +1595,12 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) { rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]); - err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, - rate, info->extack); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); if (err) return err; devlink_rate->tx_max = rate; @@ -1547,6 +1624,15 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); return false; } + } else if (type == DEVLINK_RATE_TYPE_NODE) { + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes"); + return false; + } } else { WARN_ON("Unknown type of rate object"); return false; @@ -1573,6 +1659,78 @@ static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, return err; } +static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_rate *rate_node; + const struct devlink_ops *ops; + int err; + + ops = devlink->ops; + if (!ops || !ops->rate_node_new || !ops->rate_node_del) { + NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported"); + return -EOPNOTSUPP; + } + + if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE)) + return -EOPNOTSUPP; + + rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs); + if (!IS_ERR(rate_node)) + return -EEXIST; + else if (rate_node == ERR_PTR(-EINVAL)) + return -EINVAL; + + rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL); + if (!rate_node) + return -ENOMEM; + + rate_node->devlink = devlink; + rate_node->type = DEVLINK_RATE_TYPE_NODE; + rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL); + if (!rate_node->name) { + err = -ENOMEM; + goto err_strdup; + } + + err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack); + if (err) + goto err_node_new; + + err = devlink_nl_rate_set(rate_node, ops, info); + if (err) + goto err_rate_set; + + list_add(&rate_node->list, &devlink->rate_list); + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); + return 0; + +err_rate_set: + ops->rate_node_del(rate_node, rate_node->priv, info->extack); +err_node_new: + kfree(rate_node->name); +err_strdup: + kfree(rate_node); + return err; +} + +static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *rate_node = info->user_ptr[1]; + struct devlink *devlink = rate_node->devlink; + const struct devlink_ops *ops = devlink->ops; + int err; + + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); + err = ops->rate_node_del(rate_node, rate_node->priv, info->extack); + list_del(&rate_node->list); + kfree(rate_node->name); + kfree(rate_node); + return err; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -2441,6 +2599,30 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } +static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct devlink_rate *devlink_rate; + u16 old_mode; + int err; + + if (!devlink->ops->eswitch_mode_get) + return -EOPNOTSUPP; + err = devlink->ops->eswitch_mode_get(devlink, &old_mode); + if (err) + return err; + + if (old_mode == mode) + return 0; + + list_for_each_entry(devlink_rate, &devlink->rate_list, list) + if (devlink_rate_is_node(devlink_rate)) { + NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists."); + return -EBUSY; + } + return 0; +} + static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { @@ -2455,6 +2637,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + err = devlink_rate_nodes_check(devlink, mode, info->extack); + if (err) + return err; err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; @@ -8038,6 +8223,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -8076,6 +8262,17 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, }, + { + .cmd = DEVLINK_CMD_RATE_NEW, + .doit = devlink_nl_cmd_rate_new_doit, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = DEVLINK_CMD_RATE_DEL, + .doit = devlink_nl_cmd_rate_del_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE, + }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -8933,6 +9130,33 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) } EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); +/** + * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device + * + * @devlink: devlink instance + * + * Destroy all rate nodes on specified device + * + * Context: Takes and release devlink->lock . + */ +void devlink_rate_nodes_destroy(struct devlink *devlink) +{ + static struct devlink_rate *devlink_rate, *tmp; + const struct devlink_ops *ops = devlink->ops; + + mutex_lock(&devlink->lock); + list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { + if (devlink_rate_is_node(devlink_rate)) { + ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL); + list_del(&devlink_rate->list); + kfree(devlink_rate->name); + kfree(devlink_rate); + } + } + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy); + static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, char *name, size_t len) { -- cgit v1.3-8-gc7d7 From d7555984507822458b32a6405881038241d140be Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:28 +0300 Subject: devlink: Allow setting parent node of rate objects Refactor DEVLINK_CMD_RATE_{GET|SET} command handlers to support setting a node as a parent for another rate object (leaf or node) by means of new attribute DEVLINK_ATTR_RATE_PARENT_NODE_NAME. Extend devlink ops with new callbacks rate_{leaf|node}_parent_set() to set node as a parent for rate object to allow supporting drivers to implement rate grouping through devlink. Driver implementations are allowed to support leafs or node children only. Invoking callback with NULL as parent should be threated by the driver as unset parent action. Extend rate object struct with reference counter to disallow deleting a node with any child pointing to it. User should unset parent for the child explicitly. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate add netdevsim/netdevsim10/group2 $ devlink port function rate set netdevsim/netdevsim10/group1 parent group2 $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node parent group2 $ devlink port function rate set netdevsim/netdevsim10/group1 noparent Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 14 ++++- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 125 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 137 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/devlink.h b/include/net/devlink.h index 13162b579124..eb045f1b5d1d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -142,9 +142,13 @@ struct devlink_rate { u64 tx_share; u64 tx_max; + struct devlink_rate *parent; union { struct devlink_port *devlink_port; - char *name; + struct { + char *name; + refcount_t refcnt; + }; }; }; @@ -1486,6 +1490,14 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack); + int (*rate_leaf_parent_set)(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack); + int (*rate_node_parent_set)(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7e15853b77fe..32f53a0069d6 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -549,6 +549,7 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 589d750b70e4..464f56408247 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -880,6 +880,11 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, devlink_rate->tx_max, DEVLINK_ATTR_PAD)) goto nla_put_failure; + if (devlink_rate->parent) + if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, + devlink_rate->parent->name)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -1152,6 +1157,18 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } +static bool +devlink_rate_is_parent_node(struct devlink_rate *devlink_rate, + struct devlink_rate *parent) +{ + while (parent) { + if (parent == devlink_rate) + return true; + parent = parent->parent; + } + return false; +} + static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -1572,11 +1589,75 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, return devlink->ops->port_del(devlink, port_index, extack); } +static int +devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, + struct genl_info *info, + struct nlattr *nla_parent) +{ + struct devlink *devlink = devlink_rate->devlink; + const char *parent_name = nla_data(nla_parent); + const struct devlink_ops *ops = devlink->ops; + size_t len = strlen(parent_name); + struct devlink_rate *parent; + int err = -EOPNOTSUPP; + + parent = devlink_rate->parent; + if (parent && len) { + NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent."); + return -EBUSY; + } else if (parent && !len) { + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_parent_set(devlink_rate, NULL, + devlink_rate->priv, NULL, + info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_parent_set(devlink_rate, NULL, + devlink_rate->priv, NULL, + info->extack); + if (err) + return err; + + refcount_dec(&parent->refcnt); + devlink_rate->parent = NULL; + } else if (!parent && len) { + parent = devlink_rate_node_get_by_name(devlink, parent_name); + if (IS_ERR(parent)) + return -ENODEV; + + if (parent == devlink_rate) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed"); + return -EINVAL; + } + + if (devlink_rate_is_node(devlink_rate) && + devlink_rate_is_parent_node(devlink_rate, parent->parent)) { + NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node."); + return -EEXIST; + } + + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_parent_set(devlink_rate, parent, + devlink_rate->priv, parent->priv, + info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_parent_set(devlink_rate, parent, + devlink_rate->priv, parent->priv, + info->extack); + if (err) + return err; + + refcount_inc(&parent->refcnt); + devlink_rate->parent = parent; + } + + return 0; +} + static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, const struct devlink_ops *ops, struct genl_info *info) { - struct nlattr **attrs = info->attrs; + struct nlattr *nla_parent, **attrs = info->attrs; int err = -EOPNOTSUPP; u64 rate; @@ -1606,6 +1687,14 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, devlink_rate->tx_max = rate; } + nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]; + if (nla_parent) { + err = devlink_nl_rate_parent_node_set(devlink_rate, info, + nla_parent); + if (err) + return err; + } + return 0; } @@ -1624,6 +1713,11 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); return false; } + if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && + !ops->rate_leaf_parent_set) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs"); + return false; + } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); @@ -1633,6 +1727,11 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes"); return false; } + if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && + !ops->rate_node_parent_set) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes"); + return false; + } } else { WARN_ON("Unknown type of rate object"); return false; @@ -1702,6 +1801,7 @@ static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, if (err) goto err_rate_set; + refcount_set(&rate_node->refcnt, 1); list_add(&rate_node->list, &devlink->rate_list); devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); return 0; @@ -1723,8 +1823,15 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, const struct devlink_ops *ops = devlink->ops; int err; + if (refcount_read(&rate_node->refcnt) > 1) { + NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node."); + return -EBUSY; + } + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); err = ops->rate_node_del(rate_node, rate_node->priv, info->extack); + if (rate_node->parent) + refcount_dec(&rate_node->parent->refcnt); list_del(&rate_node->list); kfree(rate_node->name); kfree(rate_node); @@ -8224,6 +8331,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -9135,7 +9243,8 @@ EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); * * @devlink: devlink instance * - * Destroy all rate nodes on specified device + * Unset parent for all rate objects and destroy all rate nodes + * on specified device. * * Context: Takes and release devlink->lock . */ @@ -9145,6 +9254,18 @@ void devlink_rate_nodes_destroy(struct devlink *devlink) const struct devlink_ops *ops = devlink->ops; mutex_lock(&devlink->lock); + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + if (!devlink_rate->parent) + continue; + + refcount_dec(&devlink_rate->parent->refcnt); + if (devlink_rate_is_leaf(devlink_rate)) + ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, + NULL, NULL); + else if (devlink_rate_is_node(devlink_rate)) + ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv, + NULL, NULL); + } list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { if (devlink_rate_is_node(devlink_rate)) { ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL); -- cgit v1.3-8-gc7d7