aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-29 10:45:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-29 10:45:20 -0700
commit0ccff074d6aa45835ccb7c0e4a995a32e4c90b5a (patch)
tree36e18383c05798b2a253b5b29d5f68688656da74 /drivers
parentMerge tag 'v6.15-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 (diff)
parentpds_fwctl: add Documentation entries (diff)
downloadwireguard-linux-0ccff074d6aa45835ccb7c0e4a995a32e4c90b5a.tar.xz
wireguard-linux-0ccff074d6aa45835ccb7c0e4a995a32e4c90b5a.zip
Merge tag 'for-linus-fwctl' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull fwctl subsystem from Jason Gunthorpe: "fwctl is a new subsystem intended to bring some common rules and order to the growing pattern of exposing a secure FW interface directly to userspace. Unlike existing places like RDMA/DRM/VFIO/uacce that are exposing a device for datapath operations fwctl is focused on debugging, configuration and provisioning of the device. It will not have the necessary features like interrupt delivery to support a datapath. This concept is similar to the long standing practice in the "HW" RAID space of having a device specific misc device to manage the RAID controller FW. fwctl generalizes this notion of a companion debug and management interface that goes along with a dataplane implemented in an appropriate subsystem. There have been three LWN articles written discussing various aspects of this: https://lwn.net/Articles/955001/ https://lwn.net/Articles/969383/ https://lwn.net/Articles/990802/ This includes three drivers to launch the subsystem: - CXL provides a vendor scheme for executing commands and a way to learn the 'command effects' (ie the security properties) of such commands. The fwctl driver allows access to these mechanism within the fwctl security model - mlx5 is family of networking products, the driver supports all current Mellanox HW still receiving FW feature updates. This includes RDMA multiprotocol NICs like ConnectX and the Bluefield family of Smart NICs. - AMD/Pensando Distributed Services card is a multi protocol Smart NIC with a multi PCI function design. fwctl works on the management PCI function following a 'command effects' model similar to CXL" * tag 'for-linus-fwctl' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (30 commits) pds_fwctl: add Documentation entries pds_fwctl: add rpc and query support pds_fwctl: initial driver framework pds_core: add new fwctl auxiliary_device pds_core: specify auxiliary_device to be created pds_core: make pdsc_auxbus_dev_del() void cxl: Fixup kdoc issues for include/cxl/features.h fwctl/cxl: Add documentation to FWCTL CXL cxl/test: Add Set Feature support to cxl_test cxl/test: Add Get Feature support to cxl_test cxl: Add support to handle user feature commands for set feature cxl: Add support to handle user feature commands for get feature cxl: Add support for fwctl RPC command to enable CXL feature commands cxl: Move cxl feature command structs to user header cxl: Add FWCTL support to CXL mlx5: Create an auxiliary device for fwctl_mlx5 fwctl/mlx5: Support for communicating with mlx5 fw fwctl: Add documentation fwctl: FWCTL_RPC to execute a Remote Procedure Call to device firmware taint: Add TAINT_FWCTL ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/cxl/Kconfig12
-rw-r--r--drivers/cxl/core/Makefile1
-rw-r--r--drivers/cxl/core/core.h17
-rw-r--r--drivers/cxl/core/features.c708
-rw-r--r--drivers/cxl/core/mbox.c124
-rw-r--r--drivers/cxl/core/memdev.c22
-rw-r--r--drivers/cxl/cxlmem.h47
-rw-r--r--drivers/cxl/pci.c8
-rw-r--r--drivers/fwctl/Kconfig33
-rw-r--r--drivers/fwctl/Makefile6
-rw-r--r--drivers/fwctl/main.c421
-rw-r--r--drivers/fwctl/mlx5/Makefile4
-rw-r--r--drivers/fwctl/mlx5/main.c411
-rw-r--r--drivers/fwctl/pds/Makefile4
-rw-r--r--drivers/fwctl/pds/main.c536
-rw-r--r--drivers/net/ethernet/amd/pds_core/auxbus.c44
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c7
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h8
-rw-r--r--drivers/net/ethernet/amd/pds_core/devlink.c7
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c9
23 files changed, 2327 insertions, 130 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 7bdad836fc62..7c556c5ac4fd 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -21,6 +21,8 @@ source "drivers/connector/Kconfig"
source "drivers/firmware/Kconfig"
+source "drivers/fwctl/Kconfig"
+
source "drivers/gnss/Kconfig"
source "drivers/mtd/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 45d1c3e630f7..b5749cf67044 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -135,6 +135,7 @@ obj-y += ufs/
obj-$(CONFIG_MEMSTICK) += memstick/
obj-$(CONFIG_INFINIBAND) += infiniband/
obj-y += firmware/
+obj-$(CONFIG_FWCTL) += fwctl/
obj-$(CONFIG_CRYPTO) += crypto/
obj-$(CONFIG_SUPERH) += sh/
obj-y += clocksource/
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 876469e23f7a..8ac1e9d70eeb 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -7,6 +7,7 @@ menuconfig CXL_BUS
select PCI_DOE
select FIRMWARE_TABLE
select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS
+ select FWCTL if CXL_FEATURES
help
CXL is a bus that is electrically compatible with PCI Express, but
layers three protocols on that signalling (CXL.io, CXL.cache, and
@@ -102,6 +103,17 @@ config CXL_MEM
If unsure say 'm'.
+config CXL_FEATURES
+ bool "CXL: Features"
+ depends on CXL_PCI
+ help
+ Enable support for CXL Features. A CXL device that includes a mailbox
+ supports commands that allows listing, getting, and setting of
+ optionally defined features such as memory sparing or post package
+ sparing. Vendors may define custom features for the device.
+
+ If unsure say 'n'
+
config CXL_PORT
default CXL_BUS
tristate
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 9259bcc6773c..b0bfbd9eac9b 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -16,3 +16,4 @@ cxl_core-y += pmu.o
cxl_core-y += cdat.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
+cxl_core-$(CONFIG_CXL_FEATURES) += features.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 800466f96a68..17e99a25c29a 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -4,6 +4,8 @@
#ifndef __CXL_CORE_H__
#define __CXL_CORE_H__
+#include <cxl/mailbox.h>
+
extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type;
extern const struct device_type cxl_pmu_type;
@@ -65,9 +67,9 @@ static inline void cxl_region_exit(void)
struct cxl_send_command;
struct cxl_mem_query_commands;
-int cxl_query_cmd(struct cxl_memdev *cxlmd,
+int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q);
-int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
+int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length);
@@ -115,4 +117,15 @@ bool cxl_need_node_perf_attrs_update(int nid);
int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
struct access_coordinate *c);
+#ifdef CONFIG_CXL_FEATURES
+size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ enum cxl_get_feat_selection selection,
+ void *feat_out, size_t feat_out_size, u16 offset,
+ u16 *return_code);
+int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ u8 feat_version, const void *feat_data,
+ size_t feat_data_size, u32 feat_flag, u16 offset,
+ u16 *return_code);
+#endif
+
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c
new file mode 100644
index 000000000000..f4daefe3180e
--- /dev/null
+++ b/drivers/cxl/core/features.c
@@ -0,0 +1,708 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024-2025 Intel Corporation. All rights reserved. */
+#include <linux/fwctl.h>
+#include <linux/device.h>
+#include <cxl/mailbox.h>
+#include <cxl/features.h>
+#include <uapi/fwctl/cxl.h>
+#include "cxl.h"
+#include "core.h"
+#include "cxlmem.h"
+
+/* All the features below are exclusive to the kernel */
+static const uuid_t cxl_exclusive_feats[] = {
+ CXL_FEAT_PATROL_SCRUB_UUID,
+ CXL_FEAT_ECS_UUID,
+ CXL_FEAT_SPPR_UUID,
+ CXL_FEAT_HPPR_UUID,
+ CXL_FEAT_CACHELINE_SPARING_UUID,
+ CXL_FEAT_ROW_SPARING_UUID,
+ CXL_FEAT_BANK_SPARING_UUID,
+ CXL_FEAT_RANK_SPARING_UUID,
+};
+
+static bool is_cxl_feature_exclusive_by_uuid(const uuid_t *uuid)
+{
+ for (int i = 0; i < ARRAY_SIZE(cxl_exclusive_feats); i++) {
+ if (uuid_equal(uuid, &cxl_exclusive_feats[i]))
+ return true;
+ }
+
+ return false;
+}
+
+static bool is_cxl_feature_exclusive(struct cxl_feat_entry *entry)
+{
+ return is_cxl_feature_exclusive_by_uuid(&entry->uuid);
+}
+
+inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
+{
+ return cxlds->cxlfs;
+}
+EXPORT_SYMBOL_NS_GPL(to_cxlfs, "CXL");
+
+static int cxl_get_supported_features_count(struct cxl_mailbox *cxl_mbox)
+{
+ struct cxl_mbox_get_sup_feats_out mbox_out;
+ struct cxl_mbox_get_sup_feats_in mbox_in;
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ memset(&mbox_in, 0, sizeof(mbox_in));
+ mbox_in.count = cpu_to_le32(sizeof(mbox_out));
+ memset(&mbox_out, 0, sizeof(mbox_out));
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
+ .size_in = sizeof(mbox_in),
+ .payload_in = &mbox_in,
+ .size_out = sizeof(mbox_out),
+ .payload_out = &mbox_out,
+ .min_out = sizeof(mbox_out),
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ return le16_to_cpu(mbox_out.supported_feats);
+}
+
+static struct cxl_feat_entries *
+get_supported_features(struct cxl_features_state *cxlfs)
+{
+ int remain_feats, max_size, max_feats, start, rc, hdr_size;
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ int feat_size = sizeof(struct cxl_feat_entry);
+ struct cxl_mbox_get_sup_feats_in mbox_in;
+ struct cxl_feat_entry *entry;
+ struct cxl_mbox_cmd mbox_cmd;
+ int user_feats = 0;
+ int count;
+
+ count = cxl_get_supported_features_count(cxl_mbox);
+ if (count <= 0)
+ return NULL;
+
+ struct cxl_feat_entries *entries __free(kvfree) =
+ kvmalloc(struct_size(entries, ent, count), GFP_KERNEL);
+ if (!entries)
+ return NULL;
+
+ struct cxl_mbox_get_sup_feats_out *mbox_out __free(kvfree) =
+ kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!mbox_out)
+ return NULL;
+
+ hdr_size = struct_size(mbox_out, ents, 0);
+ max_size = cxl_mbox->payload_size - hdr_size;
+ /* max feat entries that can fit in mailbox max payload size */
+ max_feats = max_size / feat_size;
+ entry = entries->ent;
+
+ start = 0;
+ remain_feats = count;
+ do {
+ int retrieved, alloc_size, copy_feats;
+ int num_entries;
+
+ if (remain_feats > max_feats) {
+ alloc_size = struct_size(mbox_out, ents, max_feats);
+ remain_feats = remain_feats - max_feats;
+ copy_feats = max_feats;
+ } else {
+ alloc_size = struct_size(mbox_out, ents, remain_feats);
+ copy_feats = remain_feats;
+ remain_feats = 0;
+ }
+
+ memset(&mbox_in, 0, sizeof(mbox_in));
+ mbox_in.count = cpu_to_le32(alloc_size);
+ mbox_in.start_idx = cpu_to_le16(start);
+ memset(mbox_out, 0, alloc_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
+ .size_in = sizeof(mbox_in),
+ .payload_in = &mbox_in,
+ .size_out = alloc_size,
+ .payload_out = mbox_out,
+ .min_out = hdr_size,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0)
+ return NULL;
+
+ if (mbox_cmd.size_out <= hdr_size)
+ return NULL;
+
+ /*
+ * Make sure retrieved out buffer is multiple of feature
+ * entries.
+ */
+ retrieved = mbox_cmd.size_out - hdr_size;
+ if (retrieved % feat_size)
+ return NULL;
+
+ num_entries = le16_to_cpu(mbox_out->num_entries);
+ /*
+ * If the reported output entries * defined entry size !=
+ * retrieved output bytes, then the output package is incorrect.
+ */
+ if (num_entries * feat_size != retrieved)
+ return NULL;
+
+ memcpy(entry, mbox_out->ents, retrieved);
+ for (int i = 0; i < num_entries; i++) {
+ if (!is_cxl_feature_exclusive(entry + i))
+ user_feats++;
+ }
+ entry += num_entries;
+ /*
+ * If the number of output entries is less than expected, add the
+ * remaining entries to the next batch.
+ */
+ remain_feats += copy_feats - num_entries;
+ start += num_entries;
+ } while (remain_feats);
+
+ entries->num_features = count;
+ entries->num_user_features = user_feats;
+
+ return no_free_ptr(entries);
+}
+
+static void free_cxlfs(void *_cxlfs)
+{
+ struct cxl_features_state *cxlfs = _cxlfs;
+ struct cxl_dev_state *cxlds = cxlfs->cxlds;
+
+ cxlds->cxlfs = NULL;
+ kvfree(cxlfs->entries);
+ kfree(cxlfs);
+}
+
+/**
+ * devm_cxl_setup_features() - Allocate and initialize features context
+ * @cxlds: CXL device context
+ *
+ * Return 0 on success or -errno on failure.
+ */
+int devm_cxl_setup_features(struct cxl_dev_state *cxlds)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
+
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
+ return -ENODEV;
+
+ struct cxl_features_state *cxlfs __free(kfree) =
+ kzalloc(sizeof(*cxlfs), GFP_KERNEL);
+ if (!cxlfs)
+ return -ENOMEM;
+
+ cxlfs->cxlds = cxlds;
+
+ cxlfs->entries = get_supported_features(cxlfs);
+ if (!cxlfs->entries)
+ return -ENOMEM;
+
+ cxlds->cxlfs = cxlfs;
+
+ return devm_add_action_or_reset(cxlds->dev, free_cxlfs, no_free_ptr(cxlfs));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_features, "CXL");
+
+size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ enum cxl_get_feat_selection selection,
+ void *feat_out, size_t feat_out_size, u16 offset,
+ u16 *return_code)
+{
+ size_t data_to_rd_size, size_out;
+ struct cxl_mbox_get_feat_in pi;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t data_rcvd_size = 0;
+ int rc;
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_INPUT;
+
+ if (!feat_out || !feat_out_size)
+ return 0;
+
+ size_out = min(feat_out_size, cxl_mbox->payload_size);
+ uuid_copy(&pi.uuid, feat_uuid);
+ pi.selection = selection;
+ do {
+ data_to_rd_size = min(feat_out_size - data_rcvd_size,
+ cxl_mbox->payload_size);
+ pi.offset = cpu_to_le16(offset + data_rcvd_size);
+ pi.count = cpu_to_le16(data_to_rd_size);
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_FEATURE,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ .size_out = size_out,
+ .payload_out = feat_out + data_rcvd_size,
+ .min_out = data_to_rd_size,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0 || !mbox_cmd.size_out) {
+ if (return_code)
+ *return_code = mbox_cmd.return_code;
+ return 0;
+ }
+ data_rcvd_size += mbox_cmd.size_out;
+ } while (data_rcvd_size < feat_out_size);
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_SUCCESS;
+
+ return data_rcvd_size;
+}
+
+/*
+ * FEAT_DATA_MIN_PAYLOAD_SIZE - min extra number of bytes should be
+ * available in the mailbox for storing the actual feature data so that
+ * the feature data transfer would work as expected.
+ */
+#define FEAT_DATA_MIN_PAYLOAD_SIZE 10
+int cxl_set_feature(struct cxl_mailbox *cxl_mbox,
+ const uuid_t *feat_uuid, u8 feat_version,
+ const void *feat_data, size_t feat_data_size,
+ u32 feat_flag, u16 offset, u16 *return_code)
+{
+ size_t data_in_size, data_sent_size = 0;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t hdr_size;
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_INPUT;
+
+ struct cxl_mbox_set_feat_in *pi __free(kfree) =
+ kzalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!pi)
+ return -ENOMEM;
+
+ uuid_copy(&pi->uuid, feat_uuid);
+ pi->version = feat_version;
+ feat_flag &= ~CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK;
+ feat_flag |= CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET;
+ hdr_size = sizeof(pi->hdr);
+ /*
+ * Check minimum mbox payload size is available for
+ * the feature data transfer.
+ */
+ if (hdr_size + FEAT_DATA_MIN_PAYLOAD_SIZE > cxl_mbox->payload_size)
+ return -ENOMEM;
+
+ if (hdr_size + feat_data_size <= cxl_mbox->payload_size) {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER);
+ data_in_size = feat_data_size;
+ } else {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER);
+ data_in_size = cxl_mbox->payload_size - hdr_size;
+ }
+
+ do {
+ int rc;
+
+ pi->offset = cpu_to_le16(offset + data_sent_size);
+ memcpy(pi->feat_data, feat_data + data_sent_size, data_in_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_FEATURE,
+ .size_in = hdr_size + data_in_size,
+ .payload_in = pi,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0) {
+ if (return_code)
+ *return_code = mbox_cmd.return_code;
+ return rc;
+ }
+
+ data_sent_size += data_in_size;
+ if (data_sent_size >= feat_data_size) {
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_SUCCESS;
+ return 0;
+ }
+
+ if ((feat_data_size - data_sent_size) <= (cxl_mbox->payload_size - hdr_size)) {
+ data_in_size = feat_data_size - data_sent_size;
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER);
+ } else {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER);
+ }
+ } while (true);
+}
+
+/* FWCTL support */
+
+static inline struct cxl_memdev *fwctl_to_memdev(struct fwctl_device *fwctl_dev)
+{
+ return to_cxl_memdev(fwctl_dev->dev.parent);
+}
+
+static int cxlctl_open_uctx(struct fwctl_uctx *uctx)
+{
+ return 0;
+}
+
+static void cxlctl_close_uctx(struct fwctl_uctx *uctx)
+{
+}
+
+static struct cxl_feat_entry *
+get_support_feature_info(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in)
+{
+ struct cxl_feat_entry *feat;
+ const uuid_t *uuid;
+
+ if (rpc_in->op_size < sizeof(uuid))
+ return ERR_PTR(-EINVAL);
+
+ uuid = &rpc_in->set_feat_in.uuid;
+
+ for (int i = 0; i < cxlfs->entries->num_features; i++) {
+ feat = &cxlfs->entries->ent[i];
+ if (uuid_equal(uuid, &feat->uuid))
+ return feat;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static void *cxlctl_get_supported_features(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ const struct cxl_mbox_get_sup_feats_in *feat_in;
+ struct cxl_mbox_get_sup_feats_out *feat_out;
+ struct cxl_feat_entry *pos;
+ size_t out_size;
+ int requested;
+ u32 count;
+ u16 start;
+ int i;
+
+ if (rpc_in->op_size != sizeof(*feat_in))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->get_sup_feats_in;
+ count = le32_to_cpu(feat_in->count);
+ start = le16_to_cpu(feat_in->start_idx);
+ requested = count / sizeof(*pos);
+
+ /*
+ * Make sure that the total requested number of entries is not greater
+ * than the total number of supported features allowed for userspace.
+ */
+ if (start >= cxlfs->entries->num_features)
+ return ERR_PTR(-EINVAL);
+
+ requested = min_t(int, requested, cxlfs->entries->num_features - start);
+
+ out_size = sizeof(struct fwctl_rpc_cxl_out) +
+ struct_size(feat_out, ents, requested);
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ rpc_out->size = struct_size(feat_out, ents, requested);
+ feat_out = &rpc_out->get_sup_feats_out;
+ if (requested == 0) {
+ feat_out->num_entries = cpu_to_le16(requested);
+ feat_out->supported_feats =
+ cpu_to_le16(cxlfs->entries->num_features);
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len = out_size;
+ return no_free_ptr(rpc_out);
+ }
+
+ for (i = start, pos = &feat_out->ents[0];
+ i < cxlfs->entries->num_features; i++, pos++) {
+ if (i - start == requested)
+ break;
+
+ memcpy(pos, &cxlfs->entries->ent[i], sizeof(*pos));
+ /*
+ * If the feature is exclusive, set the set_feat_size to 0 to
+ * indicate that the feature is not changeable.
+ */
+ if (is_cxl_feature_exclusive(pos)) {
+ u32 flags;
+
+ pos->set_feat_size = 0;
+ flags = le32_to_cpu(pos->flags);
+ flags &= ~CXL_FEATURE_F_CHANGEABLE;
+ pos->flags = cpu_to_le32(flags);
+ }
+ }
+
+ feat_out->num_entries = cpu_to_le16(requested);
+ feat_out->supported_feats = cpu_to_le16(cxlfs->entries->num_features);
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len = out_size;
+
+ return no_free_ptr(rpc_out);
+}
+
+static void *cxlctl_get_feature(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ const struct cxl_mbox_get_feat_in *feat_in;
+ u16 offset, count, return_code;
+ size_t out_size = *out_len;
+
+ if (rpc_in->op_size != sizeof(*feat_in))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->get_feat_in;
+ offset = le16_to_cpu(feat_in->offset);
+ count = le16_to_cpu(feat_in->count);
+
+ if (!count)
+ return ERR_PTR(-EINVAL);
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ out_size = cxl_get_feature(cxl_mbox, &feat_in->uuid,
+ feat_in->selection, rpc_out->payload,
+ count, offset, &return_code);
+ *out_len = sizeof(struct fwctl_rpc_cxl_out);
+ if (!out_size) {
+ rpc_out->size = 0;
+ rpc_out->retval = return_code;
+ return no_free_ptr(rpc_out);
+ }
+
+ rpc_out->size = out_size;
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len += out_size;
+
+ return no_free_ptr(rpc_out);
+}
+
+static void *cxlctl_set_feature(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ const struct cxl_mbox_set_feat_in *feat_in;
+ size_t out_size, data_size;
+ u16 offset, return_code;
+ u32 flags;
+ int rc;
+
+ if (rpc_in->op_size <= sizeof(feat_in->hdr))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->set_feat_in;
+
+ if (is_cxl_feature_exclusive_by_uuid(&feat_in->uuid))
+ return ERR_PTR(-EPERM);
+
+ offset = le16_to_cpu(feat_in->offset);
+ flags = le32_to_cpu(feat_in->flags);
+ out_size = *out_len;
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ rpc_out->size = 0;
+
+ data_size = rpc_in->op_size - sizeof(feat_in->hdr);
+ rc = cxl_set_feature(cxl_mbox, &feat_in->uuid,
+ feat_in->version, feat_in->feat_data,
+ data_size, flags, offset, &return_code);
+ if (rc) {
+ rpc_out->retval = return_code;
+ return no_free_ptr(rpc_out);
+ }
+
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len = sizeof(*rpc_out);
+
+ return no_free_ptr(rpc_out);
+}
+
+static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ enum fwctl_rpc_scope scope)
+{
+ u16 effects, imm_mask, reset_mask;
+ struct cxl_feat_entry *feat;
+ u32 flags;
+
+ feat = get_support_feature_info(cxlfs, rpc_in);
+ if (IS_ERR(feat))
+ return false;
+
+ /* Ensure that the attribute is changeable */
+ flags = le32_to_cpu(feat->flags);
+ if (!(flags & CXL_FEATURE_F_CHANGEABLE))
+ return false;
+
+ effects = le16_to_cpu(feat->effects);
+
+ /*
+ * Reserved bits are set, rejecting since the effects is not
+ * comprehended by the driver.
+ */
+ if (effects & CXL_CMD_EFFECTS_RESERVED) {
+ dev_warn_once(cxlfs->cxlds->dev,
+ "Reserved bits set in the Feature effects field!\n");
+ return false;
+ }
+
+ /* Currently no user background command support */
+ if (effects & CXL_CMD_BACKGROUND)
+ return false;
+
+ /* Effects cause immediate change, highest security scope is needed */
+ imm_mask = CXL_CMD_CONFIG_CHANGE_IMMEDIATE |
+ CXL_CMD_DATA_CHANGE_IMMEDIATE |
+ CXL_CMD_POLICY_CHANGE_IMMEDIATE |
+ CXL_CMD_LOG_CHANGE_IMMEDIATE;
+
+ reset_mask = CXL_CMD_CONFIG_CHANGE_COLD_RESET |
+ CXL_CMD_CONFIG_CHANGE_CONV_RESET |
+ CXL_CMD_CONFIG_CHANGE_CXL_RESET;
+
+ /* If no immediate or reset effect set, The hardware has a bug */
+ if (!(effects & imm_mask) && !(effects & reset_mask))
+ return false;
+
+ /*
+ * If the Feature setting causes immediate configuration change
+ * then we need the full write permission policy.
+ */
+ if (effects & imm_mask && scope >= FWCTL_RPC_DEBUG_WRITE_FULL)
+ return true;
+
+ /*
+ * If the Feature setting only causes configuration change
+ * after a reset, then the lesser level of write permission
+ * policy is ok.
+ */
+ if (!(effects & imm_mask) && scope >= FWCTL_RPC_DEBUG_WRITE)
+ return true;
+
+ return false;
+}
+
+static bool cxlctl_validate_hw_command(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ enum fwctl_rpc_scope scope,
+ u16 opcode)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ case CXL_MBOX_OP_GET_FEATURE:
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
+ return false;
+ if (scope >= FWCTL_RPC_CONFIGURATION)
+ return true;
+ return false;
+ case CXL_MBOX_OP_SET_FEATURE:
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RW)
+ return false;
+ return cxlctl_validate_set_features(cxlfs, rpc_in, scope);
+ default:
+ return false;
+ }
+}
+
+static void *cxlctl_handle_commands(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len, u16 opcode)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ return cxlctl_get_supported_features(cxlfs, rpc_in, out_len);
+ case CXL_MBOX_OP_GET_FEATURE:
+ return cxlctl_get_feature(cxlfs, rpc_in, out_len);
+ case CXL_MBOX_OP_SET_FEATURE:
+ return cxlctl_set_feature(cxlfs, rpc_in, out_len);
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static void *cxlctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *in, size_t in_len, size_t *out_len)
+{
+ struct fwctl_device *fwctl_dev = uctx->fwctl;
+ struct cxl_memdev *cxlmd = fwctl_to_memdev(fwctl_dev);
+ struct cxl_features_state *cxlfs = to_cxlfs(cxlmd->cxlds);
+ const struct fwctl_rpc_cxl *rpc_in = in;
+ u16 opcode = rpc_in->opcode;
+
+ if (!cxlctl_validate_hw_command(cxlfs, rpc_in, scope, opcode))
+ return ERR_PTR(-EINVAL);
+
+ return cxlctl_handle_commands(cxlfs, rpc_in, out_len, opcode);
+}
+
+static const struct fwctl_ops cxlctl_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_CXL,
+ .uctx_size = sizeof(struct fwctl_uctx),
+ .open_uctx = cxlctl_open_uctx,
+ .close_uctx = cxlctl_close_uctx,
+ .fw_rpc = cxlctl_fw_rpc,
+};
+
+DEFINE_FREE(free_fwctl_dev, struct fwctl_device *, if (_T) fwctl_put(_T))
+
+static void free_memdev_fwctl(void *_fwctl_dev)
+{
+ struct fwctl_device *fwctl_dev = _fwctl_dev;
+
+ fwctl_unregister(fwctl_dev);
+ fwctl_put(fwctl_dev);
+}
+
+int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_features_state *cxlfs;
+ int rc;
+
+ cxlfs = to_cxlfs(cxlds);
+ if (!cxlfs)
+ return -ENODEV;
+
+ /* No need to setup FWCTL if there are no user allowed features found */
+ if (!cxlfs->entries->num_user_features)
+ return -ENODEV;
+
+ struct fwctl_device *fwctl_dev __free(free_fwctl_dev) =
+ _fwctl_alloc_device(&cxlmd->dev, &cxlctl_ops, sizeof(*fwctl_dev));
+ if (!fwctl_dev)
+ return -ENOMEM;
+
+ rc = fwctl_register(fwctl_dev);
+ if (rc)
+ return rc;
+
+ return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl,
+ no_free_ptr(fwctl_dev));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL");
+
+MODULE_IMPORT_NS("FWCTL");
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 548564c770c0..78c5346e3e89 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -349,40 +349,39 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
return true;
}
-static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
- struct cxl_memdev_state *mds, u16 opcode,
+static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd,
+ struct cxl_mailbox *cxl_mbox, u16 opcode,
size_t in_size, size_t out_size, u64 in_payload)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
- *mbox = (struct cxl_mbox_cmd) {
+ *mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = opcode,
.size_in = in_size,
};
if (in_size) {
- mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
- in_size);
- if (IS_ERR(mbox->payload_in))
- return PTR_ERR(mbox->payload_in);
+ mbox_cmd->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
+ in_size);
+ if (IS_ERR(mbox_cmd->payload_in))
+ return PTR_ERR(mbox_cmd->payload_in);
- if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) {
- dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n",
+ if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) {
+ dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n",
cxl_mem_opcode_to_name(opcode));
- kvfree(mbox->payload_in);
+ kvfree(mbox_cmd->payload_in);
return -EBUSY;
}
}
/* Prepare to handle a full payload for variable sized output */
if (out_size == CXL_VARIABLE_PAYLOAD)
- mbox->size_out = cxl_mbox->payload_size;
+ mbox_cmd->size_out = cxl_mbox->payload_size;
else
- mbox->size_out = out_size;
+ mbox_cmd->size_out = out_size;
- if (mbox->size_out) {
- mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL);
- if (!mbox->payload_out) {
- kvfree(mbox->payload_in);
+ if (mbox_cmd->size_out) {
+ mbox_cmd->payload_out = kvzalloc(mbox_cmd->size_out, GFP_KERNEL);
+ if (!mbox_cmd->payload_out) {
+ kvfree(mbox_cmd->payload_in);
return -ENOMEM;
}
}
@@ -397,10 +396,8 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
- struct cxl_memdev_state *mds)
+ struct cxl_mailbox *cxl_mbox)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
-
if (send_cmd->raw.rsvd)
return -EINVAL;
@@ -415,7 +412,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
return -EPERM;
- dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n");
+ dev_WARN_ONCE(cxl_mbox->host, true, "raw command path used\n");
*mem_cmd = (struct cxl_mem_command) {
.info = {
@@ -431,7 +428,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
- struct cxl_memdev_state *mds)
+ struct cxl_mailbox *cxl_mbox)
{
struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
const struct cxl_command_info *info = &c->info;
@@ -446,11 +443,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
return -EINVAL;
/* Check that the command is enabled for hardware */
- if (!test_bit(info->id, mds->enabled_cmds))
+ if (!test_bit(info->id, cxl_mbox->enabled_cmds))
return -ENOTTY;
/* Check that the command is not claimed for exclusive kernel use */
- if (test_bit(info->id, mds->exclusive_cmds))
+ if (test_bit(info->id, cxl_mbox->exclusive_cmds))
return -EBUSY;
/* Check the input buffer is the expected size */
@@ -479,7 +476,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
/**
* cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
* @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
- * @mds: The driver data for the operation
+ * @cxl_mbox: CXL mailbox context
* @send_cmd: &struct cxl_send_command copied in from userspace.
*
* Return:
@@ -494,10 +491,9 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
* safe to send to the hardware.
*/
static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
- struct cxl_memdev_state *mds,
+ struct cxl_mailbox *cxl_mbox,
const struct cxl_send_command *send_cmd)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mem_command mem_cmd;
int rc;
@@ -514,24 +510,23 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
/* Sanitize and construct a cxl_mem_command */
if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
- rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds);
+ rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxl_mbox);
else
- rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds);
+ rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxl_mbox);
if (rc)
return rc;
/* Sanitize and construct a cxl_mbox_cmd */
- return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode,
+ return cxl_mbox_cmd_ctor(mbox_cmd, cxl_mbox, mem_cmd.opcode,
mem_cmd.info.size_in, mem_cmd.info.size_out,
send_cmd->in.payload);
}
-int cxl_query_cmd(struct cxl_memdev *cxlmd,
+int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
- struct device *dev = &cxlmd->dev;
+ struct device *dev = cxl_mbox->host;
struct cxl_mem_command *cmd;
u32 n_commands;
int j = 0;
@@ -552,9 +547,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
cxl_for_each_cmd(cmd) {
struct cxl_command_info info = cmd->info;
- if (test_bit(info.id, mds->enabled_cmds))
+ if (test_bit(info.id, cxl_mbox->enabled_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
- if (test_bit(info.id, mds->exclusive_cmds))
+ if (test_bit(info.id, cxl_mbox->exclusive_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
@@ -569,7 +564,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
/**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
- * @mds: The driver data for the operation
+ * @cxl_mbox: The mailbox context for the operation.
* @mbox_cmd: The validated mailbox command.
* @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out.
@@ -590,13 +585,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
*
* See cxl_send_cmd().
*/
-static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds,
+static int handle_mailbox_cmd_from_user(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd,
u64 out_payload, s32 *size_out,
u32 *retval)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
- struct device *dev = mds->cxlds.dev;
+ struct device *dev = cxl_mbox->host;
int rc;
dev_dbg(dev,
@@ -633,10 +627,9 @@ out:
return rc;
}
-int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
+int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
- struct device *dev = &cxlmd->dev;
+ struct device *dev = cxl_mbox->host;
struct cxl_send_command send;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -646,11 +639,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
- rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send);
+ rc = cxl_validate_cmd_from_user(&mbox_cmd, cxl_mbox, &send);
if (rc)
return rc;
- rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload,
+ rc = handle_mailbox_cmd_from_user(cxl_mbox, &mbox_cmd, send.out.payload,
&send.out.size, &send.retval);
if (rc)
return rc;
@@ -713,6 +706,35 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
return 0;
}
+static int check_features_opcodes(u16 opcode, int *ro_cmds, int *wr_cmds)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ case CXL_MBOX_OP_GET_FEATURE:
+ (*ro_cmds)++;
+ return 1;
+ case CXL_MBOX_OP_SET_FEATURE:
+ (*wr_cmds)++;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* 'Get Supported Features' and 'Get Feature' */
+#define MAX_FEATURES_READ_CMDS 2
+static void set_features_cap(struct cxl_mailbox *cxl_mbox,
+ int ro_cmds, int wr_cmds)
+{
+ /* Setting up Features capability while walking the CEL */
+ if (ro_cmds == MAX_FEATURES_READ_CMDS) {
+ if (wr_cmds)
+ cxl_mbox->feat_cap = CXL_FEATURES_RW;
+ else
+ cxl_mbox->feat_cap = CXL_FEATURES_RO;
+ }
+}
+
/**
* cxl_walk_cel() - Walk through the Command Effects Log.
* @mds: The driver data for the operation
@@ -724,10 +746,11 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
*/
static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_cel_entry *cel_entry;
const int cel_entries = size / sizeof(*cel_entry);
struct device *dev = mds->cxlds.dev;
- int i;
+ int i, ro_cmds = 0, wr_cmds = 0;
cel_entry = (struct cxl_cel_entry *) cel;
@@ -737,10 +760,13 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
int enabled = 0;
if (cmd) {
- set_bit(cmd->info.id, mds->enabled_cmds);
+ set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
enabled++;
}
+ enabled += check_features_opcodes(opcode, &ro_cmds,
+ &wr_cmds);
+
if (cxl_is_poison_command(opcode)) {
cxl_set_poison_cmd_enabled(&mds->poison, opcode);
enabled++;
@@ -754,6 +780,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
dev_dbg(dev, "Opcode 0x%04x %s\n", opcode,
enabled ? "enabled" : "unsupported by driver");
}
+
+ set_features_cap(cxl_mbox, ro_cmds, wr_cmds);
}
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds)
@@ -807,6 +835,7 @@ static const uuid_t log_uuid[] = {
*/
int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_supported_logs *gsl;
struct device *dev = mds->cxlds.dev;
struct cxl_mem_command *cmd;
@@ -845,7 +874,7 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
/* In case CEL was bogus, enable some default commands. */
cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
- set_bit(cmd->info.id, mds->enabled_cmds);
+ set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
/* Found the required CEL */
rc = 0;
@@ -1448,6 +1477,7 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
mds->cxlds.reg_map.host = dev;
+ mds->cxlds.cxl_mbox.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index ae3dfcbe8938..2e2e035abdaa 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -564,9 +564,11 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL");
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
down_write(&cxl_memdev_rwsem);
- bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
- CXL_MEM_COMMAND_ID_MAX);
+ bitmap_or(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
+ cmds, CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
@@ -579,9 +581,11 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
down_write(&cxl_memdev_rwsem);
- bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
- CXL_MEM_COMMAND_ID_MAX);
+ bitmap_andnot(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
+ cmds, CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL");
@@ -656,11 +660,14 @@ err:
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
unsigned long arg)
{
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
switch (cmd) {
case CXL_MEM_QUERY_COMMANDS:
- return cxl_query_cmd(cxlmd, (void __user *)arg);
+ return cxl_query_cmd(cxl_mbox, (void __user *)arg);
case CXL_MEM_SEND_COMMAND:
- return cxl_send_cmd(cxlmd, (void __user *)arg);
+ return cxl_send_cmd(cxl_mbox, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -994,10 +1001,11 @@ static void cxl_remove_fw_upload(void *fwl)
int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
struct device *dev = &cxlds->cxlmd->dev;
struct fw_upload *fwl;
- if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds))
+ if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, cxl_mbox->enabled_cmds))
return 0;
fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev),
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 2a25d1957ddb..dd2b7060d501 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -106,42 +106,6 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
-/**
- * struct cxl_mbox_cmd - A command to be submitted to hardware.
- * @opcode: (input) The command set and command submitted to hardware.
- * @payload_in: (input) Pointer to the input payload.
- * @payload_out: (output) Pointer to the output payload. Must be allocated by
- * the caller.
- * @size_in: (input) Number of bytes to load from @payload_in.
- * @size_out: (input) Max number of bytes loaded into @payload_out.
- * (output) Number of bytes generated by the device. For fixed size
- * outputs commands this is always expected to be deterministic. For
- * variable sized output commands, it tells the exact number of bytes
- * written.
- * @min_out: (input) internal command output payload size validation
- * @poll_count: (input) Number of timeouts to attempt.
- * @poll_interval_ms: (input) Time between mailbox background command polling
- * interval timeouts.
- * @return_code: (output) Error code returned from hardware.
- *
- * This is the primary mechanism used to send commands to the hardware.
- * All the fields except @payload_* correspond exactly to the fields described in
- * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
- * @payload_out are written to, and read from the Command Payload Registers
- * defined in CXL 2.0 8.2.8.4.8.
- */
-struct cxl_mbox_cmd {
- u16 opcode;
- void *payload_in;
- void *payload_out;
- size_t size_in;
- size_t size_out;
- size_t min_out;
- int poll_count;
- int poll_interval_ms;
- u16 return_code;
-};
-
/*
* Per CXL 3.0 Section 8.2.8.4.5.1
*/
@@ -428,6 +392,7 @@ struct cxl_dpa_perf {
* @serial: PCIe Device Serial Number
* @type: Generic Memory Class device or Vendor Specific Memory device
* @cxl_mbox: CXL mailbox context
+ * @cxlfs: CXL features context
*/
struct cxl_dev_state {
struct device *dev;
@@ -443,6 +408,9 @@ struct cxl_dev_state {
u64 serial;
enum cxl_devtype type;
struct cxl_mailbox cxl_mbox;
+#ifdef CONFIG_CXL_FEATURES
+ struct cxl_features_state *cxlfs;
+#endif
};
static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
@@ -461,8 +429,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
* @lsa_size: Size of Label Storage Area
* (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
* @firmware_version: Firmware version for the memory device.
- * @enabled_cmds: Hardware commands found enabled in CEL.
- * @exclusive_cmds: Commands that are kernel-internal only
* @total_bytes: sum of all possible capacities
* @volatile_only_bytes: hard volatile capacity
* @persistent_only_bytes: hard persistent capacity
@@ -485,8 +451,6 @@ struct cxl_memdev_state {
struct cxl_dev_state cxlds;
size_t lsa_size;
char firmware_version[0x10];
- DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
- DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
u64 total_bytes;
u64 volatile_only_bytes;
u64 persistent_only_bytes;
@@ -530,6 +494,9 @@ enum cxl_opcode {
CXL_MBOX_OP_GET_LOG_CAPS = 0x0402,
CXL_MBOX_OP_CLEAR_LOG = 0x0403,
CXL_MBOX_OP_GET_SUP_LOG_SUBLIST = 0x0405,
+ CXL_MBOX_OP_GET_SUPPORTED_FEATURES = 0x0500,
+ CXL_MBOX_OP_GET_FEATURE = 0x0501,
+ CXL_MBOX_OP_SET_FEATURE = 0x0502,
CXL_MBOX_OP_IDENTIFY = 0x4000,
CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100,
CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index a96e54c6259e..993fa60fe453 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -997,6 +997,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = devm_cxl_setup_features(cxlds);
+ if (rc)
+ dev_dbg(&pdev->dev, "No CXL Features discovered\n");
+
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
@@ -1009,6 +1013,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = devm_cxl_setup_fwctl(cxlmd);
+ if (rc)
+ dev_dbg(&pdev->dev, "No CXL FWCTL setup\n");
+
pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
if (pmu_count < 0)
return pmu_count;
diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
new file mode 100644
index 000000000000..b5583b12a011
--- /dev/null
+++ b/drivers/fwctl/Kconfig
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menuconfig FWCTL
+ tristate "fwctl device firmware access framework"
+ help
+ fwctl provides a userspace API for restricted access to communicate
+ with on-device firmware. The communication channel is intended to
+ support a wide range of lockdown compatible device behaviors including
+ manipulating device FLASH, debugging, and other activities that don't
+ fit neatly into an existing subsystem.
+
+if FWCTL
+config FWCTL_MLX5
+ tristate "mlx5 ConnectX control fwctl driver"
+ depends on MLX5_CORE
+ help
+ MLX5 provides interface for the user process to access the debug and
+ configuration registers of the ConnectX hardware family
+ (NICs, PCI switches and SmartNIC SoCs).
+ This will allow configuration and debug tools to work out of the box on
+ mainstream kernel.
+
+ If you don't know what to do here, say N.
+
+config FWCTL_PDS
+ tristate "AMD/Pensando pds fwctl driver"
+ depends on PDS_CORE
+ help
+ The pds_fwctl driver provides an fwctl interface for a user process
+ to access the debug and configuration information of the AMD/Pensando
+ DSC hardware family.
+
+ If you don't know what to do here, say N.
+endif
diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile
new file mode 100644
index 000000000000..c093b5f661d6
--- /dev/null
+++ b/drivers/fwctl/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL) += fwctl.o
+obj-$(CONFIG_FWCTL_MLX5) += mlx5/
+obj-$(CONFIG_FWCTL_PDS) += pds/
+
+fwctl-y += main.o
diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
new file mode 100644
index 000000000000..cb1ac9c40239
--- /dev/null
+++ b/drivers/fwctl/main.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#define pr_fmt(fmt) "fwctl: " fmt
+#include <linux/fwctl.h>
+
+#include <linux/container_of.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include <uapi/fwctl/fwctl.h>
+
+enum {
+ FWCTL_MAX_DEVICES = 4096,
+ MAX_RPC_LEN = SZ_2M,
+};
+static_assert(FWCTL_MAX_DEVICES < (1U << MINORBITS));
+
+static dev_t fwctl_dev;
+static DEFINE_IDA(fwctl_ida);
+static unsigned long fwctl_tainted;
+
+struct fwctl_ucmd {
+ struct fwctl_uctx *uctx;
+ void __user *ubuffer;
+ void *cmd;
+ u32 user_size;
+};
+
+static int ucmd_respond(struct fwctl_ucmd *ucmd, size_t cmd_len)
+{
+ if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
+ min_t(size_t, ucmd->user_size, cmd_len)))
+ return -EFAULT;
+ return 0;
+}
+
+static int copy_to_user_zero_pad(void __user *to, const void *from,
+ size_t from_len, size_t user_len)
+{
+ size_t copy_len;
+
+ copy_len = min(from_len, user_len);
+ if (copy_to_user(to, from, copy_len))
+ return -EFAULT;
+ if (copy_len < user_len) {
+ if (clear_user(to + copy_len, user_len - copy_len))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int fwctl_cmd_info(struct fwctl_ucmd *ucmd)
+{
+ struct fwctl_device *fwctl = ucmd->uctx->fwctl;
+ struct fwctl_info *cmd = ucmd->cmd;
+ size_t driver_info_len = 0;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ if (!fwctl->ops->info && cmd->device_data_len) {
+ if (clear_user(u64_to_user_ptr(cmd->out_device_data),
+ cmd->device_data_len))
+ return -EFAULT;
+ } else if (cmd->device_data_len) {
+ void *driver_info __free(kfree) =
+ fwctl->ops->info(ucmd->uctx, &driver_info_len);
+ if (IS_ERR(driver_info))
+ return PTR_ERR(driver_info);
+
+ if (copy_to_user_zero_pad(u64_to_user_ptr(cmd->out_device_data),
+ driver_info, driver_info_len,
+ cmd->device_data_len))
+ return -EFAULT;
+ }
+
+ cmd->out_device_type = fwctl->ops->device_type;
+ cmd->device_data_len = driver_info_len;
+ return ucmd_respond(ucmd, sizeof(*cmd));
+}
+
+static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd)
+{
+ struct fwctl_device *fwctl = ucmd->uctx->fwctl;
+ struct fwctl_rpc *cmd = ucmd->cmd;
+ size_t out_len;
+
+ if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN)
+ return -EMSGSIZE;
+
+ switch (cmd->scope) {
+ case FWCTL_RPC_CONFIGURATION:
+ case FWCTL_RPC_DEBUG_READ_ONLY:
+ break;
+
+ case FWCTL_RPC_DEBUG_WRITE_FULL:
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ fallthrough;
+ case FWCTL_RPC_DEBUG_WRITE:
+ if (!test_and_set_bit(0, &fwctl_tainted)) {
+ dev_warn(
+ &fwctl->dev,
+ "%s(%d): has requested full access to the physical device device",
+ current->comm, task_pid_nr(current));
+ add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK);
+ }
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ void *inbuf __free(kvfree) = kvzalloc(cmd->in_len, GFP_KERNEL_ACCOUNT);
+ if (!inbuf)
+ return -ENOMEM;
+ if (copy_from_user(inbuf, u64_to_user_ptr(cmd->in), cmd->in_len))
+ return -EFAULT;
+
+ out_len = cmd->out_len;
+ void *outbuf __free(kvfree) = fwctl->ops->fw_rpc(
+ ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
+ if (IS_ERR(outbuf))
+ return PTR_ERR(outbuf);
+ if (outbuf == inbuf) {
+ /* The driver can re-use inbuf as outbuf */
+ inbuf = NULL;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(cmd->out), outbuf,
+ min(cmd->out_len, out_len)))
+ return -EFAULT;
+
+ cmd->out_len = out_len;
+ return ucmd_respond(ucmd, sizeof(*cmd));
+}
+
+/* On stack memory for the ioctl structs */
+union fwctl_ucmd_buffer {
+ struct fwctl_info info;
+ struct fwctl_rpc rpc;
+};
+
+struct fwctl_ioctl_op {
+ unsigned int size;
+ unsigned int min_size;
+ unsigned int ioctl_num;
+ int (*execute)(struct fwctl_ucmd *ucmd);
+};
+
+#define IOCTL_OP(_ioctl, _fn, _struct, _last) \
+ [_IOC_NR(_ioctl) - FWCTL_CMD_BASE] = { \
+ .size = sizeof(_struct) + \
+ BUILD_BUG_ON_ZERO(sizeof(union fwctl_ucmd_buffer) < \
+ sizeof(_struct)), \
+ .min_size = offsetofend(_struct, _last), \
+ .ioctl_num = _ioctl, \
+ .execute = _fn, \
+ }
+static const struct fwctl_ioctl_op fwctl_ioctl_ops[] = {
+ IOCTL_OP(FWCTL_INFO, fwctl_cmd_info, struct fwctl_info, out_device_data),
+ IOCTL_OP(FWCTL_RPC, fwctl_cmd_rpc, struct fwctl_rpc, out),
+};
+
+static long fwctl_fops_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fwctl_uctx *uctx = filp->private_data;
+ const struct fwctl_ioctl_op *op;
+ struct fwctl_ucmd ucmd = {};
+ union fwctl_ucmd_buffer buf;
+ unsigned int nr;
+ int ret;
+
+ nr = _IOC_NR(cmd);
+ if ((nr - FWCTL_CMD_BASE) >= ARRAY_SIZE(fwctl_ioctl_ops))
+ return -ENOIOCTLCMD;
+
+ op = &fwctl_ioctl_ops[nr - FWCTL_CMD_BASE];
+ if (op->ioctl_num != cmd)
+ return -ENOIOCTLCMD;
+
+ ucmd.uctx = uctx;
+ ucmd.cmd = &buf;
+ ucmd.ubuffer = (void __user *)arg;
+ ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
+ if (ret)
+ return ret;
+
+ if (ucmd.user_size < op->min_size)
+ return -EINVAL;
+
+ ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
+ ucmd.user_size);
+ if (ret)
+ return ret;
+
+ guard(rwsem_read)(&uctx->fwctl->registration_lock);
+ if (!uctx->fwctl->ops)
+ return -ENODEV;
+ return op->execute(&ucmd);
+}
+
+static int fwctl_fops_open(struct inode *inode, struct file *filp)
+{
+ struct fwctl_device *fwctl =
+ container_of(inode->i_cdev, struct fwctl_device, cdev);
+ int ret;
+
+ guard(rwsem_read)(&fwctl->registration_lock);
+ if (!fwctl->ops)
+ return -ENODEV;
+
+ struct fwctl_uctx *uctx __free(kfree) =
+ kzalloc(fwctl->ops->uctx_size, GFP_KERNEL_ACCOUNT);
+ if (!uctx)
+ return -ENOMEM;
+
+ uctx->fwctl = fwctl;
+ ret = fwctl->ops->open_uctx(uctx);
+ if (ret)
+ return ret;
+
+ scoped_guard(mutex, &fwctl->uctx_list_lock) {
+ list_add_tail(&uctx->uctx_list_entry, &fwctl->uctx_list);
+ }
+
+ get_device(&fwctl->dev);
+ filp->private_data = no_free_ptr(uctx);
+ return 0;
+}
+
+static void fwctl_destroy_uctx(struct fwctl_uctx *uctx)
+{
+ lockdep_assert_held(&uctx->fwctl->uctx_list_lock);
+ list_del(&uctx->uctx_list_entry);
+ uctx->fwctl->ops->close_uctx(uctx);
+}
+
+static int fwctl_fops_release(struct inode *inode, struct file *filp)
+{
+ struct fwctl_uctx *uctx = filp->private_data;
+ struct fwctl_device *fwctl = uctx->fwctl;
+
+ scoped_guard(rwsem_read, &fwctl->registration_lock) {
+ /*
+ * NULL ops means fwctl_unregister() has already removed the
+ * driver and destroyed the uctx.
+ */
+ if (fwctl->ops) {
+ guard(mutex)(&fwctl->uctx_list_lock);
+ fwctl_destroy_uctx(uctx);
+ }
+ }
+
+ kfree(uctx);
+ fwctl_put(fwctl);
+ return 0;
+}
+
+static const struct file_operations fwctl_fops = {
+ .owner = THIS_MODULE,
+ .open = fwctl_fops_open,
+ .release = fwctl_fops_release,
+ .unlocked_ioctl = fwctl_fops_ioctl,
+};
+
+static void fwctl_device_release(struct device *device)
+{
+ struct fwctl_device *fwctl =
+ container_of(device, struct fwctl_device, dev);
+
+ ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
+ mutex_destroy(&fwctl->uctx_list_lock);
+ kfree(fwctl);
+}
+
+static char *fwctl_devnode(const struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "fwctl/%s", dev_name(dev));
+}
+
+static struct class fwctl_class = {
+ .name = "fwctl",
+ .dev_release = fwctl_device_release,
+ .devnode = fwctl_devnode,
+};
+
+static struct fwctl_device *
+_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
+{
+ struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
+ int devnum;
+
+ if (!fwctl)
+ return NULL;
+
+ devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
+ if (devnum < 0)
+ return NULL;
+
+ fwctl->dev.devt = fwctl_dev + devnum;
+ fwctl->dev.class = &fwctl_class;
+ fwctl->dev.parent = parent;
+
+ init_rwsem(&fwctl->registration_lock);
+ mutex_init(&fwctl->uctx_list_lock);
+ INIT_LIST_HEAD(&fwctl->uctx_list);
+
+ device_initialize(&fwctl->dev);
+ return_ptr(fwctl);
+}
+
+/* Drivers use the fwctl_alloc_device() wrapper */
+struct fwctl_device *_fwctl_alloc_device(struct device *parent,
+ const struct fwctl_ops *ops,
+ size_t size)
+{
+ struct fwctl_device *fwctl __free(fwctl) =
+ _alloc_device(parent, ops, size);
+
+ if (!fwctl)
+ return NULL;
+
+ cdev_init(&fwctl->cdev, &fwctl_fops);
+ /*
+ * The driver module is protected by fwctl_register/unregister(),
+ * unregister won't complete until we are done with the driver's module.
+ */
+ fwctl->cdev.owner = THIS_MODULE;
+
+ if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev))
+ return NULL;
+
+ fwctl->ops = ops;
+ return_ptr(fwctl);
+}
+EXPORT_SYMBOL_NS_GPL(_fwctl_alloc_device, "FWCTL");
+
+/**
+ * fwctl_register - Register a new device to the subsystem
+ * @fwctl: Previously allocated fwctl_device
+ *
+ * On return the device is visible through sysfs and /dev, driver ops may be
+ * called.
+ */
+int fwctl_register(struct fwctl_device *fwctl)
+{
+ return cdev_device_add(&fwctl->cdev, &fwctl->dev);
+}
+EXPORT_SYMBOL_NS_GPL(fwctl_register, "FWCTL");
+
+/**
+ * fwctl_unregister - Unregister a device from the subsystem
+ * @fwctl: Previously allocated and registered fwctl_device
+ *
+ * Undoes fwctl_register(). On return no driver ops will be called. The
+ * caller must still call fwctl_put() to free the fwctl.
+ *
+ * Unregister will return even if userspace still has file descriptors open.
+ * This will call ops->close_uctx() on any open FDs and after return no driver
+ * op will be called. The FDs remain open but all fops will return -ENODEV.
+ *
+ * The design of fwctl allows this sort of disassociation of the driver from the
+ * subsystem primarily by keeping memory allocations owned by the core subsytem.
+ * The fwctl_device and fwctl_uctx can both be freed without requiring a driver
+ * callback. This allows the module to remain unlocked while FDs are open.
+ */
+void fwctl_unregister(struct fwctl_device *fwctl)
+{
+ struct fwctl_uctx *uctx;
+
+ cdev_device_del(&fwctl->cdev, &fwctl->dev);
+
+ /* Disable and free the driver's resources for any still open FDs. */
+ guard(rwsem_write)(&fwctl->registration_lock);
+ guard(mutex)(&fwctl->uctx_list_lock);
+ while ((uctx = list_first_entry_or_null(&fwctl->uctx_list,
+ struct fwctl_uctx,
+ uctx_list_entry)))
+ fwctl_destroy_uctx(uctx);
+
+ /*
+ * The driver module may unload after this returns, the op pointer will
+ * not be valid.
+ */
+ fwctl->ops = NULL;
+}
+EXPORT_SYMBOL_NS_GPL(fwctl_unregister, "FWCTL");
+
+static int __init fwctl_init(void)
+{
+ int ret;
+
+ ret = alloc_chrdev_region(&fwctl_dev, 0, FWCTL_MAX_DEVICES, "fwctl");
+ if (ret)
+ return ret;
+
+ ret = class_register(&fwctl_class);
+ if (ret)
+ goto err_chrdev;
+ return 0;
+
+err_chrdev:
+ unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
+ return ret;
+}
+
+static void __exit fwctl_exit(void)
+{
+ class_unregister(&fwctl_class);
+ unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
+}
+
+module_init(fwctl_init);
+module_exit(fwctl_exit);
+MODULE_DESCRIPTION("fwctl device firmware access framework");
+MODULE_LICENSE("GPL");
diff --git a/drivers/fwctl/mlx5/Makefile b/drivers/fwctl/mlx5/Makefile
new file mode 100644
index 000000000000..139a23e3c7c5
--- /dev/null
+++ b/drivers/fwctl/mlx5/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL_MLX5) += mlx5_fwctl.o
+
+mlx5_fwctl-y += main.o
diff --git a/drivers/fwctl/mlx5/main.c b/drivers/fwctl/mlx5/main.c
new file mode 100644
index 000000000000..f93aa0cecdb9
--- /dev/null
+++ b/drivers/fwctl/mlx5/main.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/fwctl.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/driver.h>
+#include <uapi/fwctl/mlx5.h>
+
+#define mlx5ctl_err(mcdev, format, ...) \
+ dev_err(&mcdev->fwctl.dev, format, ##__VA_ARGS__)
+
+#define mlx5ctl_dbg(mcdev, format, ...) \
+ dev_dbg(&mcdev->fwctl.dev, "PID %u: " format, current->pid, \
+ ##__VA_ARGS__)
+
+struct mlx5ctl_uctx {
+ struct fwctl_uctx uctx;
+ u32 uctx_caps;
+ u32 uctx_uid;
+};
+
+struct mlx5ctl_dev {
+ struct fwctl_device fwctl;
+ struct mlx5_core_dev *mdev;
+};
+DEFINE_FREE(mlx5ctl, struct mlx5ctl_dev *, if (_T) fwctl_put(&_T->fwctl));
+
+struct mlx5_ifc_mbox_in_hdr_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_out_hdr_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+enum {
+ MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES = 0x4,
+};
+
+enum {
+ MLX5_CMD_OP_QUERY_DRIVER_VERSION = 0x10c,
+ MLX5_CMD_OP_QUERY_OTHER_HCA_CAP = 0x10e,
+ MLX5_CMD_OP_QUERY_RDB = 0x512,
+ MLX5_CMD_OP_QUERY_PSV = 0x602,
+ MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716,
+ MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER = 0x722,
+ MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT = 0x728,
+ MLX5_CMD_OP_QUERY_BURST_SIZE = 0x813,
+ MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS = 0x819,
+ MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS = 0x820,
+ MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS = 0x821,
+ MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS = 0x911,
+ MLX5_CMD_OP_QUERY_AFU = 0x971,
+ MLX5_CMD_OP_QUERY_CAPI_PEC = 0x981,
+ MLX5_CMD_OP_QUERY_UCTX = 0xa05,
+ MLX5_CMD_OP_QUERY_UMEM = 0xa09,
+ MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE = 0xb02,
+ MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO = 0xb03,
+ MLX5_CMD_OP_QUERY_REGEXP_PARAMS = 0xb05,
+ MLX5_CMD_OP_QUERY_REGEXP_REGISTER = 0xb07,
+ MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY = 0xb08,
+ MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS = 0xb0a,
+ MLX5_CMD_OP_ACCESS_REGISTER_USER = 0xb0c,
+ MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING = 0xb0f,
+ MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO = 0xb13,
+ MLX5_CMD_OP_QUERY_CRYPTO_STATE = 0xb14,
+ MLX5_CMD_OP_QUERY_VUID = 0xb22,
+ MLX5_CMD_OP_QUERY_DPA_PARTITION = 0xb28,
+ MLX5_CMD_OP_QUERY_DPA_PARTITIONS = 0xb2a,
+ MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT = 0xb2e,
+ MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO = 0xb2f,
+ MLX5_CMD_OP_QUERY_RSV_RESOURCES = 0x8000,
+ MLX5_CMD_OP_QUERY_MTT = 0x8001,
+ MLX5_CMD_OP_QUERY_SCHED_QUEUE = 0x8006,
+};
+
+static int mlx5ctl_alloc_uid(struct mlx5ctl_dev *mcdev, u32 cap)
+{
+ u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
+ void *uctx;
+ int ret;
+ u16 uid;
+
+ uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+
+ mlx5ctl_dbg(mcdev, "%s: caps 0x%x\n", __func__, cap);
+ MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
+ MLX5_SET(uctx, uctx, cap, cap);
+
+ ret = mlx5_cmd_exec(mcdev->mdev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ uid = MLX5_GET(create_uctx_out, out, uid);
+ mlx5ctl_dbg(mcdev, "allocated uid %u with caps 0x%x\n", uid, cap);
+ return uid;
+}
+
+static void mlx5ctl_release_uid(struct mlx5ctl_dev *mcdev, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
+ struct mlx5_core_dev *mdev = mcdev->mdev;
+ int ret;
+
+ MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
+ MLX5_SET(destroy_uctx_in, in, uid, uid);
+
+ ret = mlx5_cmd_exec_in(mdev, destroy_uctx, in);
+ mlx5ctl_dbg(mcdev, "released uid %u %pe\n", uid, ERR_PTR(ret));
+}
+
+static int mlx5ctl_open_uctx(struct fwctl_uctx *uctx)
+{
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+ struct mlx5ctl_dev *mcdev =
+ container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+ int uid;
+
+ /*
+ * New FW supports the TOOLS_RESOURCES uid security label
+ * which allows commands to manipulate the global device state.
+ * Otherwise only basic existing RDMA devx privilege are allowed.
+ */
+ if (MLX5_CAP_GEN(mcdev->mdev, uctx_cap) &
+ MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES)
+ mfd->uctx_caps |= MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES;
+
+ uid = mlx5ctl_alloc_uid(mcdev, mfd->uctx_caps);
+ if (uid < 0)
+ return uid;
+
+ mfd->uctx_uid = uid;
+ return 0;
+}
+
+static void mlx5ctl_close_uctx(struct fwctl_uctx *uctx)
+{
+ struct mlx5ctl_dev *mcdev =
+ container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+
+ mlx5ctl_release_uid(mcdev, mfd->uctx_uid);
+}
+
+static void *mlx5ctl_info(struct fwctl_uctx *uctx, size_t *length)
+{
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+ struct fwctl_info_mlx5 *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->uid = mfd->uctx_uid;
+ info->uctx_caps = mfd->uctx_caps;
+ *length = sizeof(*info);
+ return info;
+}
+
+static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope)
+{
+ u16 opcode = MLX5_GET(mbox_in_hdr, in, opcode);
+ u16 op_mod = MLX5_GET(mbox_in_hdr, in, op_mod);
+
+ /*
+ * Currently the driver can't keep track of commands that allocate
+ * objects in the FW, these commands are safe from a security
+ * perspective but nothing will free the memory when the FD is closed.
+ * For now permit only query commands and set commands that don't alter
+ * objects. Also the caps for the scope have not been defined yet,
+ * filter commands manually for now.
+ */
+ switch (opcode) {
+ case MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_OTHER_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OPCODE_QUERY_VUID:
+ /*
+ * FW limits SET_HCA_CAP on the tools UID to only the other function
+ * mode which is used for function pre-configuration
+ */
+ case MLX5_CMD_OP_SET_HCA_CAP:
+ return true; /* scope >= FWCTL_RPC_CONFIGURATION; */
+
+ case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+ case MLX5_CMD_OP_FPGA_QUERY_QP:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_QUERY_AFU:
+ case MLX5_CMD_OP_QUERY_BURST_SIZE:
+ case MLX5_CMD_OP_QUERY_CAPI_PEC:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_QUERY_CRYPTO_STATE:
+ case MLX5_CMD_OP_QUERY_DC_CNAK_TRACE:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS:
+ case MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS:
+ case MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS:
+ case MLX5_CMD_OP_QUERY_DPA_PARTITION:
+ case MLX5_CMD_OP_QUERY_DPA_PARTITIONS:
+ case MLX5_CMD_OP_QUERY_DRIVER_VERSION:
+ case MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO:
+ case MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO:
+ case MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING:
+ case MLX5_CMD_OP_QUERY_EQ:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_QUERY_MAD_DEMUX:
+ case MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_QUERY_MTT:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER:
+ case MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE:
+ case MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT:
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_PAGES:
+ case MLX5_CMD_OP_QUERY_PSV:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+ case MLX5_CMD_OP_QUERY_RDB:
+ case MLX5_CMD_OP_QUERY_REGEXP_PARAMS:
+ case MLX5_CMD_OP_QUERY_REGEXP_REGISTER:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_QUERY_RQT:
+ case MLX5_CMD_OP_QUERY_RSV_RESOURCES:
+ case MLX5_CMD_OP_QUERY_SCHED_QUEUE:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_SF_PARTITION:
+ case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_QUERY_UCTX:
+ case MLX5_CMD_OP_QUERY_UMEM:
+ case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
+ case MLX5_CMD_OP_QUERY_VHCA_STATE:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_WOL_ROL:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS:
+ return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
+
+ case MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS:
+ return scope >= FWCTL_RPC_DEBUG_WRITE;
+
+ case MLX5_CMD_OP_ACCESS_REG:
+ case MLX5_CMD_OP_ACCESS_REGISTER_USER:
+ if (op_mod == 0) /* write */
+ return true; /* scope >= FWCTL_RPC_CONFIGURATION; */
+ return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
+ default:
+ return false;
+ }
+}
+
+static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *rpc_in, size_t in_len, size_t *out_len)
+{
+ struct mlx5ctl_dev *mcdev =
+ container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+ void *rpc_out;
+ int ret;
+
+ if (in_len < MLX5_ST_SZ_BYTES(mbox_in_hdr) ||
+ *out_len < MLX5_ST_SZ_BYTES(mbox_out_hdr))
+ return ERR_PTR(-EMSGSIZE);
+
+ mlx5ctl_dbg(mcdev, "[UID %d] cmdif: opcode 0x%x inlen %zu outlen %zu\n",
+ mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
+ in_len, *out_len);
+
+ if (!mlx5ctl_validate_rpc(rpc_in, scope))
+ return ERR_PTR(-EBADMSG);
+
+ /*
+ * mlx5_cmd_do() copies the input message to its own buffer before
+ * executing it, so we can reuse the allocation for the output.
+ */
+ if (*out_len <= in_len) {
+ rpc_out = rpc_in;
+ } else {
+ rpc_out = kvzalloc(*out_len, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Enforce the user context for the command */
+ MLX5_SET(mbox_in_hdr, rpc_in, uid, mfd->uctx_uid);
+ ret = mlx5_cmd_do(mcdev->mdev, rpc_in, in_len, rpc_out, *out_len);
+
+ mlx5ctl_dbg(mcdev,
+ "[UID %d] cmdif: opcode 0x%x status 0x%x retval %pe\n",
+ mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
+ MLX5_GET(mbox_out_hdr, rpc_out, status), ERR_PTR(ret));
+
+ /*
+ * -EREMOTEIO means execution succeeded and the out is valid,
+ * but an error code was returned inside out. Everything else
+ * means the RPC did not make it to the device.
+ */
+ if (ret && ret != -EREMOTEIO) {
+ if (rpc_out != rpc_in)
+ kfree(rpc_out);
+ return ERR_PTR(ret);
+ }
+ return rpc_out;
+}
+
+static const struct fwctl_ops mlx5ctl_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_MLX5,
+ .uctx_size = sizeof(struct mlx5ctl_uctx),
+ .open_uctx = mlx5ctl_open_uctx,
+ .close_uctx = mlx5ctl_close_uctx,
+ .info = mlx5ctl_info,
+ .fw_rpc = mlx5ctl_fw_rpc,
+};
+
+static int mlx5ctl_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+
+{
+ struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = madev->mdev;
+ struct mlx5ctl_dev *mcdev __free(mlx5ctl) = fwctl_alloc_device(
+ &mdev->pdev->dev, &mlx5ctl_ops, struct mlx5ctl_dev, fwctl);
+ int ret;
+
+ if (!mcdev)
+ return -ENOMEM;
+
+ mcdev->mdev = mdev;
+
+ ret = fwctl_register(&mcdev->fwctl);
+ if (ret)
+ return ret;
+ auxiliary_set_drvdata(adev, no_free_ptr(mcdev));
+ return 0;
+}
+
+static void mlx5ctl_remove(struct auxiliary_device *adev)
+{
+ struct mlx5ctl_dev *mcdev = auxiliary_get_drvdata(adev);
+
+ fwctl_unregister(&mcdev->fwctl);
+ fwctl_put(&mcdev->fwctl);
+}
+
+static const struct auxiliary_device_id mlx5ctl_id_table[] = {
+ {.name = MLX5_ADEV_NAME ".fwctl",},
+ {}
+};
+MODULE_DEVICE_TABLE(auxiliary, mlx5ctl_id_table);
+
+static struct auxiliary_driver mlx5ctl_driver = {
+ .name = "mlx5_fwctl",
+ .probe = mlx5ctl_probe,
+ .remove = mlx5ctl_remove,
+ .id_table = mlx5ctl_id_table,
+};
+
+module_auxiliary_driver(mlx5ctl_driver);
+
+MODULE_IMPORT_NS("FWCTL");
+MODULE_DESCRIPTION("mlx5 ConnectX fwctl driver");
+MODULE_AUTHOR("Saeed Mahameed <saeedm@nvidia.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/fwctl/pds/Makefile b/drivers/fwctl/pds/Makefile
new file mode 100644
index 000000000000..cc2317c07be1
--- /dev/null
+++ b/drivers/fwctl/pds/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL_PDS) += pds_fwctl.o
+
+pds_fwctl-y += main.o
diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c
new file mode 100644
index 000000000000..284c4165fdd4
--- /dev/null
+++ b/drivers/fwctl/pds/main.c
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) Advanced Micro Devices, Inc */
+
+#include <linux/module.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/bitfield.h>
+
+#include <uapi/fwctl/fwctl.h>
+#include <uapi/fwctl/pds.h>
+#include <linux/fwctl.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+struct pdsfc_uctx {
+ struct fwctl_uctx uctx;
+ u32 uctx_caps;
+};
+
+struct pdsfc_rpc_endpoint_info {
+ u32 endpoint;
+ dma_addr_t operations_pa;
+ struct pds_fwctl_query_data *operations;
+ struct mutex lock; /* lock for endpoint info management */
+};
+
+struct pdsfc_dev {
+ struct fwctl_device fwctl;
+ struct pds_auxiliary_dev *padev;
+ u32 caps;
+ struct pds_fwctl_ident ident;
+ dma_addr_t endpoints_pa;
+ struct pds_fwctl_query_data *endpoints;
+ struct pdsfc_rpc_endpoint_info *endpoint_info;
+};
+
+static int pdsfc_open_uctx(struct fwctl_uctx *uctx)
+{
+ struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
+ struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx);
+
+ pdsfc_uctx->uctx_caps = pdsfc->caps;
+
+ return 0;
+}
+
+static void pdsfc_close_uctx(struct fwctl_uctx *uctx)
+{
+}
+
+static void *pdsfc_info(struct fwctl_uctx *uctx, size_t *length)
+{
+ struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx);
+ struct fwctl_info_pds *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->uctx_caps = pdsfc_uctx->uctx_caps;
+
+ return info;
+}
+
+static int pdsfc_identify(struct pdsfc_dev *pdsfc)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ union pds_core_adminq_comp comp = {0};
+ union pds_core_adminq_cmd cmd;
+ struct pds_fwctl_ident *ident;
+ dma_addr_t ident_pa;
+ int err;
+
+ ident = dma_alloc_coherent(dev->parent, sizeof(*ident), &ident_pa, GFP_KERNEL);
+ if (!ident) {
+ dev_err(dev, "Failed to map ident buffer\n");
+ return -ENOMEM;
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_ident = {
+ .opcode = PDS_FWCTL_CMD_IDENT,
+ .version = 0,
+ .len = cpu_to_le32(sizeof(*ident)),
+ .ident_pa = cpu_to_le64(ident_pa),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err)
+ dev_err(dev, "Failed to send adminq cmd opcode: %u err: %d\n",
+ cmd.fwctl_ident.opcode, err);
+ else
+ pdsfc->ident = *ident;
+
+ dma_free_coherent(dev->parent, sizeof(*ident), ident, ident_pa);
+
+ return err;
+}
+
+static void pdsfc_free_endpoints(struct pdsfc_dev *pdsfc)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ int i;
+
+ if (!pdsfc->endpoints)
+ return;
+
+ for (i = 0; pdsfc->endpoint_info && i < pdsfc->endpoints->num_entries; i++)
+ mutex_destroy(&pdsfc->endpoint_info[i].lock);
+ vfree(pdsfc->endpoint_info);
+ pdsfc->endpoint_info = NULL;
+ dma_free_coherent(dev->parent, PAGE_SIZE,
+ pdsfc->endpoints, pdsfc->endpoints_pa);
+ pdsfc->endpoints = NULL;
+ pdsfc->endpoints_pa = DMA_MAPPING_ERROR;
+}
+
+static void pdsfc_free_operations(struct pdsfc_dev *pdsfc)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ u32 num_endpoints;
+ int i;
+
+ num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
+ for (i = 0; i < num_endpoints; i++) {
+ struct pdsfc_rpc_endpoint_info *ei = &pdsfc->endpoint_info[i];
+
+ if (ei->operations) {
+ dma_free_coherent(dev->parent, PAGE_SIZE,
+ ei->operations, ei->operations_pa);
+ ei->operations = NULL;
+ ei->operations_pa = DMA_MAPPING_ERROR;
+ }
+ }
+}
+
+static struct pds_fwctl_query_data *pdsfc_get_endpoints(struct pdsfc_dev *pdsfc,
+ dma_addr_t *pa)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ union pds_core_adminq_comp comp = {0};
+ struct pds_fwctl_query_data *data;
+ union pds_core_adminq_cmd cmd;
+ dma_addr_t data_pa;
+ int err;
+
+ data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
+ if (!data) {
+ dev_err(dev, "Failed to map endpoint list\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_query = {
+ .opcode = PDS_FWCTL_CMD_QUERY,
+ .entity = PDS_FWCTL_RPC_ROOT,
+ .version = 0,
+ .query_data_buf_len = cpu_to_le32(PAGE_SIZE),
+ .query_data_buf_pa = cpu_to_le64(data_pa),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err) {
+ dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
+ cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
+ dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
+ return ERR_PTR(err);
+ }
+
+ *pa = data_pa;
+
+ return data;
+}
+
+static int pdsfc_init_endpoints(struct pdsfc_dev *pdsfc)
+{
+ struct pds_fwctl_query_data_endpoint *ep_entry;
+ u32 num_endpoints;
+ int i;
+
+ pdsfc->endpoints = pdsfc_get_endpoints(pdsfc, &pdsfc->endpoints_pa);
+ if (IS_ERR(pdsfc->endpoints))
+ return PTR_ERR(pdsfc->endpoints);
+
+ num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
+ pdsfc->endpoint_info = vcalloc(num_endpoints,
+ sizeof(*pdsfc->endpoint_info));
+ if (!pdsfc->endpoint_info) {
+ pdsfc_free_endpoints(pdsfc);
+ return -ENOMEM;
+ }
+
+ ep_entry = (struct pds_fwctl_query_data_endpoint *)pdsfc->endpoints->entries;
+ for (i = 0; i < num_endpoints; i++) {
+ mutex_init(&pdsfc->endpoint_info[i].lock);
+ pdsfc->endpoint_info[i].endpoint = ep_entry[i].id;
+ }
+
+ return 0;
+}
+
+static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc,
+ dma_addr_t *pa, u32 ep)
+{
+ struct pds_fwctl_query_data_operation *entries;
+ struct device *dev = &pdsfc->fwctl.dev;
+ union pds_core_adminq_comp comp = {0};
+ struct pds_fwctl_query_data *data;
+ union pds_core_adminq_cmd cmd;
+ dma_addr_t data_pa;
+ int err;
+ int i;
+
+ /* Query the operations list for the given endpoint */
+ data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
+ if (!data) {
+ dev_err(dev, "Failed to map operations list\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_query = {
+ .opcode = PDS_FWCTL_CMD_QUERY,
+ .entity = PDS_FWCTL_RPC_ENDPOINT,
+ .version = 0,
+ .query_data_buf_len = cpu_to_le32(PAGE_SIZE),
+ .query_data_buf_pa = cpu_to_le64(data_pa),
+ .ep = cpu_to_le32(ep),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err) {
+ dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
+ cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
+ dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
+ return ERR_PTR(err);
+ }
+
+ *pa = data_pa;
+
+ entries = (struct pds_fwctl_query_data_operation *)data->entries;
+ dev_dbg(dev, "num_entries %d\n", data->num_entries);
+ for (i = 0; i < data->num_entries; i++) {
+
+ /* Translate FW command attribute to fwctl scope */
+ switch (entries[i].scope) {
+ case PDSFC_FW_CMD_ATTR_READ:
+ case PDSFC_FW_CMD_ATTR_WRITE:
+ case PDSFC_FW_CMD_ATTR_SYNC:
+ entries[i].scope = FWCTL_RPC_CONFIGURATION;
+ break;
+ case PDSFC_FW_CMD_ATTR_DEBUG_READ:
+ entries[i].scope = FWCTL_RPC_DEBUG_READ_ONLY;
+ break;
+ case PDSFC_FW_CMD_ATTR_DEBUG_WRITE:
+ entries[i].scope = FWCTL_RPC_DEBUG_WRITE;
+ break;
+ default:
+ entries[i].scope = FWCTL_RPC_DEBUG_WRITE_FULL;
+ break;
+ }
+ dev_dbg(dev, "endpoint %d operation: id %x scope %d\n",
+ ep, entries[i].id, entries[i].scope);
+ }
+
+ return data;
+}
+
+static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc,
+ struct fwctl_rpc_pds *rpc,
+ enum fwctl_rpc_scope scope)
+{
+ struct pds_fwctl_query_data_operation *op_entry;
+ struct pdsfc_rpc_endpoint_info *ep_info = NULL;
+ struct device *dev = &pdsfc->fwctl.dev;
+ int i;
+
+ /* validate rpc in_len & out_len based
+ * on ident.max_req_sz & max_resp_sz
+ */
+ if (rpc->in.len > pdsfc->ident.max_req_sz) {
+ dev_dbg(dev, "Invalid request size %u, max %u\n",
+ rpc->in.len, pdsfc->ident.max_req_sz);
+ return -EINVAL;
+ }
+
+ if (rpc->out.len > pdsfc->ident.max_resp_sz) {
+ dev_dbg(dev, "Invalid response size %u, max %u\n",
+ rpc->out.len, pdsfc->ident.max_resp_sz);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < pdsfc->endpoints->num_entries; i++) {
+ if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) {
+ ep_info = &pdsfc->endpoint_info[i];
+ break;
+ }
+ }
+ if (!ep_info) {
+ dev_dbg(dev, "Invalid endpoint %d\n", rpc->in.ep);
+ return -EINVAL;
+ }
+
+ /* query and cache this endpoint's operations */
+ mutex_lock(&ep_info->lock);
+ if (!ep_info->operations) {
+ struct pds_fwctl_query_data *operations;
+
+ operations = pdsfc_get_operations(pdsfc,
+ &ep_info->operations_pa,
+ rpc->in.ep);
+ if (IS_ERR(operations)) {
+ mutex_unlock(&ep_info->lock);
+ return -ENOMEM;
+ }
+ ep_info->operations = operations;
+ }
+ mutex_unlock(&ep_info->lock);
+
+ /* reject unsupported and/or out of scope commands */
+ op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
+ for (i = 0; i < ep_info->operations->num_entries; i++) {
+ if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
+ if (scope < op_entry[i].scope)
+ return -EPERM;
+ return 0;
+ }
+ }
+
+ dev_dbg(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);
+
+ return -EINVAL;
+}
+
+static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *in, size_t in_len, size_t *out_len)
+{
+ struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
+ struct device *dev = &uctx->fwctl->dev;
+ union pds_core_adminq_comp comp = {0};
+ dma_addr_t out_payload_dma_addr = 0;
+ dma_addr_t in_payload_dma_addr = 0;
+ struct fwctl_rpc_pds *rpc = in;
+ union pds_core_adminq_cmd cmd;
+ void *out_payload = NULL;
+ void *in_payload = NULL;
+ void *out = NULL;
+ int err;
+
+ err = pdsfc_validate_rpc(pdsfc, rpc, scope);
+ if (err)
+ return ERR_PTR(err);
+
+ if (rpc->in.len > 0) {
+ in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
+ if (!in_payload) {
+ dev_err(dev, "Failed to allocate in_payload\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
+ rpc->in.len)) {
+ dev_dbg(dev, "Failed to copy in_payload from user\n");
+ err = -EFAULT;
+ goto err_in_payload;
+ }
+
+ in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
+ rpc->in.len, DMA_TO_DEVICE);
+ err = dma_mapping_error(dev->parent, in_payload_dma_addr);
+ if (err) {
+ dev_dbg(dev, "Failed to map in_payload\n");
+ goto err_in_payload;
+ }
+ }
+
+ if (rpc->out.len > 0) {
+ out_payload = kzalloc(rpc->out.len, GFP_KERNEL);
+ if (!out_payload) {
+ dev_dbg(dev, "Failed to allocate out_payload\n");
+ err = -ENOMEM;
+ goto err_out_payload;
+ }
+
+ out_payload_dma_addr = dma_map_single(dev->parent, out_payload,
+ rpc->out.len, DMA_FROM_DEVICE);
+ err = dma_mapping_error(dev->parent, out_payload_dma_addr);
+ if (err) {
+ dev_dbg(dev, "Failed to map out_payload\n");
+ goto err_out_payload;
+ }
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_rpc = {
+ .opcode = PDS_FWCTL_CMD_RPC,
+ .flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP,
+ .ep = cpu_to_le32(rpc->in.ep),
+ .op = cpu_to_le32(rpc->in.op),
+ .req_pa = cpu_to_le64(in_payload_dma_addr),
+ .req_sz = cpu_to_le32(rpc->in.len),
+ .resp_pa = cpu_to_le64(out_payload_dma_addr),
+ .resp_sz = cpu_to_le32(rpc->out.len),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err) {
+ dev_dbg(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
+ __func__, rpc->in.ep, rpc->in.op,
+ cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
+ cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
+ err);
+ goto done;
+ }
+
+ dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
+
+ if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
+ dev_dbg(dev, "Failed to copy out_payload to user\n");
+ out = ERR_PTR(-EFAULT);
+ goto done;
+ }
+
+ rpc->out.retval = le32_to_cpu(comp.fwctl_rpc.err);
+ *out_len = in_len;
+ out = in;
+
+done:
+ if (out_payload_dma_addr)
+ dma_unmap_single(dev->parent, out_payload_dma_addr,
+ rpc->out.len, DMA_FROM_DEVICE);
+err_out_payload:
+ kfree(out_payload);
+
+ if (in_payload_dma_addr)
+ dma_unmap_single(dev->parent, in_payload_dma_addr,
+ rpc->in.len, DMA_TO_DEVICE);
+err_in_payload:
+ kfree(in_payload);
+err_out:
+ if (err)
+ return ERR_PTR(err);
+
+ return out;
+}
+
+static const struct fwctl_ops pdsfc_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_PDS,
+ .uctx_size = sizeof(struct pdsfc_uctx),
+ .open_uctx = pdsfc_open_uctx,
+ .close_uctx = pdsfc_close_uctx,
+ .info = pdsfc_info,
+ .fw_rpc = pdsfc_fw_rpc,
+};
+
+static int pdsfc_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct pds_auxiliary_dev *padev =
+ container_of(adev, struct pds_auxiliary_dev, aux_dev);
+ struct device *dev = &adev->dev;
+ struct pdsfc_dev *pdsfc;
+ int err;
+
+ pdsfc = fwctl_alloc_device(&padev->vf_pdev->dev, &pdsfc_ops,
+ struct pdsfc_dev, fwctl);
+ if (!pdsfc)
+ return dev_err_probe(dev, -ENOMEM, "Failed to allocate fwctl device struct\n");
+ pdsfc->padev = padev;
+
+ err = pdsfc_identify(pdsfc);
+ if (err) {
+ fwctl_put(&pdsfc->fwctl);
+ return dev_err_probe(dev, err, "Failed to identify device\n");
+ }
+
+ err = pdsfc_init_endpoints(pdsfc);
+ if (err) {
+ fwctl_put(&pdsfc->fwctl);
+ return dev_err_probe(dev, err, "Failed to init endpoints\n");
+ }
+
+ pdsfc->caps = PDS_FWCTL_QUERY_CAP | PDS_FWCTL_SEND_CAP;
+
+ err = fwctl_register(&pdsfc->fwctl);
+ if (err) {
+ pdsfc_free_endpoints(pdsfc);
+ fwctl_put(&pdsfc->fwctl);
+ return dev_err_probe(dev, err, "Failed to register device\n");
+ }
+
+ auxiliary_set_drvdata(adev, pdsfc);
+
+ return 0;
+}
+
+static void pdsfc_remove(struct auxiliary_device *adev)
+{
+ struct pdsfc_dev *pdsfc = auxiliary_get_drvdata(adev);
+
+ fwctl_unregister(&pdsfc->fwctl);
+ pdsfc_free_operations(pdsfc);
+ pdsfc_free_endpoints(pdsfc);
+
+ fwctl_put(&pdsfc->fwctl);
+}
+
+static const struct auxiliary_device_id pdsfc_id_table[] = {
+ {.name = PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_FWCTL_STR },
+ {}
+};
+MODULE_DEVICE_TABLE(auxiliary, pdsfc_id_table);
+
+static struct auxiliary_driver pdsfc_driver = {
+ .name = "pds_fwctl",
+ .probe = pdsfc_probe,
+ .remove = pdsfc_remove,
+ .id_table = pdsfc_id_table,
+};
+
+module_auxiliary_driver(pdsfc_driver);
+
+MODULE_IMPORT_NS("FWCTL");
+MODULE_DESCRIPTION("pds fwctl driver");
+MODULE_AUTHOR("Shannon Nelson <shannon.nelson@amd.com>");
+MODULE_AUTHOR("Brett Creeley <brett.creeley@amd.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c
index 2babea110991..eeb72b1809ea 100644
--- a/drivers/net/ethernet/amd/pds_core/auxbus.c
+++ b/drivers/net/ethernet/amd/pds_core/auxbus.c
@@ -175,34 +175,32 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf,
return padev;
}
-int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf)
+void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf,
+ struct pds_auxiliary_dev **pd_ptr)
{
struct pds_auxiliary_dev *padev;
- int err = 0;
- if (!cf)
- return -ENODEV;
+ if (!*pd_ptr)
+ return;
mutex_lock(&pf->config_lock);
- padev = pf->vfs[cf->vf_id].padev;
- if (padev) {
- pds_client_unregister(pf, padev->client_id);
- auxiliary_device_delete(&padev->aux_dev);
- auxiliary_device_uninit(&padev->aux_dev);
- padev->client_id = 0;
- }
- pf->vfs[cf->vf_id].padev = NULL;
+ padev = *pd_ptr;
+ pds_client_unregister(pf, padev->client_id);
+ auxiliary_device_delete(&padev->aux_dev);
+ auxiliary_device_uninit(&padev->aux_dev);
+ padev->client_id = 0;
+ *pd_ptr = NULL;
mutex_unlock(&pf->config_lock);
- return err;
}
-int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
+int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf,
+ enum pds_core_vif_types vt,
+ struct pds_auxiliary_dev **pd_ptr)
{
struct pds_auxiliary_dev *padev;
char devname[PDS_DEVNAME_LEN];
- enum pds_core_vif_types vt;
unsigned long mask;
u16 vt_support;
int client_id;
@@ -211,6 +209,9 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
if (!cf)
return -ENODEV;
+ if (vt >= PDS_DEV_TYPE_MAX)
+ return -EINVAL;
+
mutex_lock(&pf->config_lock);
mask = BIT_ULL(PDSC_S_FW_DEAD) |
@@ -222,17 +223,10 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
goto out_unlock;
}
- /* We only support vDPA so far, so it is the only one to
- * be verified that it is available in the Core device and
- * enabled in the devlink param. In the future this might
- * become a loop for several VIF types.
- */
-
/* Verify that the type is supported and enabled. It is not
- * an error if there is no auxbus device support for this
- * VF, it just means something else needs to happen with it.
+ * an error if the firmware doesn't support the feature, the
+ * driver just won't set up an auxiliary_device for it.
*/
- vt = PDS_DEV_TYPE_VDPA;
vt_support = !!le16_to_cpu(pf->dev_ident.vif_types[vt]);
if (!(vt_support &&
pf->viftype_status[vt].supported &&
@@ -258,7 +252,7 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
err = PTR_ERR(padev);
goto out_unlock;
}
- pf->vfs[cf->vf_id].padev = padev;
+ *pd_ptr = padev;
out_unlock:
mutex_unlock(&pf->config_lock);
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 536635e57727..1eb0d92786f7 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -402,6 +402,9 @@ err_out_uninit:
}
static struct pdsc_viftype pdsc_viftype_defaults[] = {
+ [PDS_DEV_TYPE_FWCTL] = { .name = PDS_DEV_TYPE_FWCTL_STR,
+ .vif_id = PDS_DEV_TYPE_FWCTL,
+ .dl_id = -1 },
[PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR,
.vif_id = PDS_DEV_TYPE_VDPA,
.dl_id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET },
@@ -428,6 +431,10 @@ static int pdsc_viftypes_init(struct pdsc *pdsc)
/* See what the Core device has for support */
vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]);
+
+ if (vt == PDS_DEV_TYPE_FWCTL)
+ pdsc->viftype_status[vt].enabled = true;
+
dev_dbg(pdsc->dev, "VIF %s is %ssupported\n",
pdsc->viftype_status[vt].name,
vt_support ? "" : "not ");
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 14522d6d5f86..0bf320c43083 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -156,6 +156,7 @@ struct pdsc {
struct dentry *dentry;
struct device *dev;
struct pdsc_dev_bar bars[PDS_CORE_BARS_MAX];
+ struct pds_auxiliary_dev *padev;
struct pdsc_vf *vfs;
int num_vfs;
int vf_id;
@@ -303,8 +304,11 @@ void pdsc_health_thread(struct work_struct *work);
int pdsc_register_notify(struct notifier_block *nb);
void pdsc_unregister_notify(struct notifier_block *nb);
void pdsc_notify(unsigned long event, void *data);
-int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf);
-int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf);
+int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf,
+ enum pds_core_vif_types vt,
+ struct pds_auxiliary_dev **pd_ptr);
+void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf,
+ struct pds_auxiliary_dev **pd_ptr);
void pdsc_process_adminq(struct pdsc_qcq *qcq);
void pdsc_work_thread(struct work_struct *work);
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index 44971e71991f..c5c787df61a4 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -56,8 +56,11 @@ int pdsc_dl_enable_set(struct devlink *dl, u32 id,
for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) {
struct pdsc *vf = pdsc->vfs[vf_id].vf;
- err = ctx->val.vbool ? pdsc_auxbus_dev_add(vf, pdsc) :
- pdsc_auxbus_dev_del(vf, pdsc);
+ if (ctx->val.vbool)
+ err = pdsc_auxbus_dev_add(vf, pdsc, vt_entry->vif_id,
+ &pdsc->vfs[vf_id].padev);
+ else
+ pdsc_auxbus_dev_del(vf, pdsc, &pdsc->vfs[vf_id].padev);
}
return err;
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 660268ff9562..4843f9249a31 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -190,7 +190,8 @@ static int pdsc_init_vf(struct pdsc *vf)
devl_unlock(dl);
pf->vfs[vf->vf_id].vf = vf;
- err = pdsc_auxbus_dev_add(vf, pf);
+ err = pdsc_auxbus_dev_add(vf, pf, PDS_DEV_TYPE_VDPA,
+ &pf->vfs[vf->vf_id].padev);
if (err) {
devl_lock(dl);
devl_unregister(dl);
@@ -264,6 +265,10 @@ static int pdsc_init_pf(struct pdsc *pdsc)
mutex_unlock(&pdsc->config_lock);
+ err = pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL, &pdsc->padev);
+ if (err)
+ goto err_out_stop;
+
dl = priv_to_devlink(pdsc);
devl_lock(dl);
err = devl_params_register(dl, pdsc_dl_params,
@@ -272,7 +277,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
devl_unlock(dl);
dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n",
ERR_PTR(err));
- goto err_out_stop;
+ goto err_out_del_dev;
}
hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
@@ -295,6 +300,8 @@ static int pdsc_init_pf(struct pdsc *pdsc)
err_out_unreg_params:
devlink_params_unregister(dl, pdsc_dl_params,
ARRAY_SIZE(pdsc_dl_params));
+err_out_del_dev:
+ pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
err_out_stop:
pdsc_stop(pdsc);
err_out_teardown:
@@ -417,7 +424,7 @@ static void pdsc_remove(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf)) {
- pdsc_auxbus_dev_del(pdsc, pf);
+ pdsc_auxbus_dev_del(pdsc, pf, &pf->vfs[pdsc->vf_id].padev);
pf->vfs[pdsc->vf_id].vf = NULL;
}
} else {
@@ -426,6 +433,7 @@ static void pdsc_remove(struct pci_dev *pdev)
* shut themselves down.
*/
pdsc_sriov_configure(pdev, 0);
+ pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
timer_shutdown_sync(&pdsc->wdtimer);
if (pdsc->wq)
@@ -482,7 +490,10 @@ static void pdsc_reset_prepare(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf))
- pdsc_auxbus_dev_del(pdsc, pf);
+ pdsc_auxbus_dev_del(pdsc, pf,
+ &pf->vfs[pdsc->vf_id].padev);
+ } else {
+ pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
}
pdsc_unmap_bars(pdsc);
@@ -527,7 +538,11 @@ static void pdsc_reset_done(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf))
- pdsc_auxbus_dev_add(pdsc, pf);
+ pdsc_auxbus_dev_add(pdsc, pf, PDS_DEV_TYPE_VDPA,
+ &pf->vfs[pdsc->vf_id].padev);
+ } else {
+ pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL,
+ &pdsc->padev);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 9a79674d27f1..891bbbbfbbf1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -228,8 +228,15 @@ enum {
MLX5_INTERFACE_PROTOCOL_VNET,
MLX5_INTERFACE_PROTOCOL_DPLL,
+ MLX5_INTERFACE_PROTOCOL_FWCTL,
};
+static bool is_fwctl_supported(struct mlx5_core_dev *dev)
+{
+ /* fwctl is most useful on PFs, prevent fwctl on SFs for now */
+ return MLX5_CAP_GEN(dev, uctx_cap) && !mlx5_core_is_sf(dev);
+}
+
static const struct mlx5_adev_device {
const char *suffix;
bool (*is_supported)(struct mlx5_core_dev *dev);
@@ -252,6 +259,8 @@ static const struct mlx5_adev_device {
.is_supported = &is_mp_supported },
[MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll",
.is_supported = &is_dpll_supported },
+ [MLX5_INTERFACE_PROTOCOL_FWCTL] = { .suffix = "fwctl",
+ .is_supported = &is_fwctl_supported },
};
int mlx5_adev_idx_alloc(void)