aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/tainted-kernels.rst5
-rw-r--r--Documentation/userspace-api/fwctl/fwctl-cxl.rst142
-rw-r--r--Documentation/userspace-api/fwctl/fwctl.rst286
-rw-r--r--Documentation/userspace-api/fwctl/index.rst14
-rw-r--r--Documentation/userspace-api/fwctl/pds_fwctl.rst46
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--MAINTAINERS26
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/cxl/Kconfig12
-rw-r--r--drivers/cxl/core/Makefile1
-rw-r--r--drivers/cxl/core/core.h17
-rw-r--r--drivers/cxl/core/features.c708
-rw-r--r--drivers/cxl/core/mbox.c124
-rw-r--r--drivers/cxl/core/memdev.c22
-rw-r--r--drivers/cxl/cxlmem.h47
-rw-r--r--drivers/cxl/pci.c8
-rw-r--r--drivers/fwctl/Kconfig33
-rw-r--r--drivers/fwctl/Makefile6
-rw-r--r--drivers/fwctl/main.c421
-rw-r--r--drivers/fwctl/mlx5/Makefile4
-rw-r--r--drivers/fwctl/mlx5/main.c411
-rw-r--r--drivers/fwctl/pds/Makefile4
-rw-r--r--drivers/fwctl/pds/main.c536
-rw-r--r--drivers/net/ethernet/amd/pds_core/auxbus.c44
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c7
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h8
-rw-r--r--drivers/net/ethernet/amd/pds_core/devlink.c7
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c9
-rw-r--r--include/cxl/features.h87
-rw-r--r--include/cxl/mailbox.h44
-rw-r--r--include/linux/fwctl.h135
-rw-r--r--include/linux/panic.h3
-rw-r--r--include/linux/pds/pds_adminq.h277
-rw-r--r--include/linux/pds/pds_common.h2
-rw-r--r--include/uapi/cxl/features.h170
-rw-r--r--include/uapi/fwctl/cxl.h56
-rw-r--r--include/uapi/fwctl/fwctl.h141
-rw-r--r--include/uapi/fwctl/mlx5.h36
-rw-r--r--include/uapi/fwctl/pds.h62
-rw-r--r--kernel/panic.c1
-rwxr-xr-xtools/debugging/kernel-chktaint8
-rw-r--r--tools/testing/cxl/Kbuild1
-rw-r--r--tools/testing/cxl/test/mem.c185
46 files changed, 4054 insertions, 132 deletions
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index 700aa72eecb1..a0cc017e4424 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -101,6 +101,7 @@ Bit Log Number Reason that got the kernel tainted
16 _/X 65536 auxiliary taint, defined for and used by distros
17 _/T 131072 kernel was built with the struct randomization plugin
18 _/N 262144 an in-kernel test has been run
+ 19 _/J 524288 userspace used a mutating debug operation in fwctl
=== === ====== ========================================================
Note: The character ``_`` is representing a blank in this table to make reading
@@ -184,3 +185,7 @@ More detailed explanation for tainting
build time.
18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
+
+ 19) ``J`` if userpace opened /dev/fwctl/* and performed a FWTCL_RPC_DEBUG_WRITE
+ to use the devices debugging features. Device debugging features could
+ cause the device to malfunction in undefined ways.
diff --git a/Documentation/userspace-api/fwctl/fwctl-cxl.rst b/Documentation/userspace-api/fwctl/fwctl-cxl.rst
new file mode 100644
index 000000000000..670b43b72949
--- /dev/null
+++ b/Documentation/userspace-api/fwctl/fwctl-cxl.rst
@@ -0,0 +1,142 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+fwctl cxl driver
+================
+
+:Author: Dave Jiang
+
+Overview
+========
+
+The CXL spec defines a set of commands that can be issued to the mailbox of a
+CXL device or switch. It also left room for vendor specific commands to be
+issued to the mailbox as well. fwctl provides a path to issue a set of allowed
+mailbox commands from user space to the device moderated by the kernel driver.
+
+The following 3 commands will be used to support CXL Features:
+CXL spec r3.1 8.2.9.6.1 Get Supported Features (Opcode 0500h)
+CXL spec r3.1 8.2.9.6.2 Get Feature (Opcode 0501h)
+CXL spec r3.1 8.2.9.6.3 Set Feature (Opcode 0502h)
+
+The "Get Supported Features" return data may be filtered by the kernel driver to
+drop any features that are forbidden by the kernel or being exclusively used by
+the kernel. The driver will set the "Set Feature Size" of the "Get Supported
+Features Supported Feature Entry" to 0 to indicate that the Feature cannot be
+modified. The "Get Supported Features" command and the "Get Features" falls
+under the fwctl policy of FWCTL_RPC_CONFIGURATION.
+
+For "Set Feature" command, the access policy currently is broken down into two
+categories depending on the Set Feature effects reported by the device. If the
+Set Feature will cause immediate change to the device, the fwctl access policy
+must be FWCTL_RPC_DEBUG_WRITE_FULL. The effects for this level are
+"immediate config change", "immediate data change", "immediate policy change",
+or "immediate log change" for the set effects mask. If the effects are "config
+change with cold reset" or "config change with conventional reset", then the
+fwctl access policy must be FWCTL_RPC_DEBUG_WRITE or higher.
+
+fwctl cxl User API
+==================
+
+.. kernel-doc:: include/uapi/fwctl/cxl.h
+
+1. Driver info query
+--------------------
+
+First step for the app is to issue the ioctl(FWCTL_CMD_INFO). Successful
+invocation of the ioctl implies the Features capability is operational and
+returns an all zeros 32bit payload. A ``struct fwctl_info`` needs to be filled
+out with the ``fwctl_info.out_device_type`` set to ``FWCTL_DEVICE_TYPE_CXL``.
+The return data should be ``struct fwctl_info_cxl`` that contains a reserved
+32bit field that should be all zeros.
+
+2. Send hardware commands
+-------------------------
+
+Next step is to send the 'Get Supported Features' command to the driver from
+user space via ioctl(FWCTL_RPC). A ``struct fwctl_rpc_cxl`` is pointed to
+by ``fwctl_rpc.in``. ``struct fwctl_rpc_cxl.in_payload`` points to
+the hardware input structure that is defined by the CXL spec. ``fwctl_rpc.out``
+points to the buffer that contains a ``struct fwctl_rpc_cxl_out`` that includes
+the hardware output data inlined as ``fwctl_rpc_cxl_out.payload``. This command
+is called twice. First time to retrieve the number of features supported.
+A second time to retrieve the specific feature details as the output data.
+
+After getting the specific feature details, a Get/Set Feature command can be
+appropriately programmed and sent. For a "Set Feature" command, the retrieved
+feature info contains an effects field that details the resulting
+"Set Feature" command will trigger. That will inform the user whether
+the system is configured to allowed the "Set Feature" command or not.
+
+Code example of a Get Feature
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ static int cxl_fwctl_rpc_get_test_feature(int fd, struct test_feature *feat_ctx,
+ const uint32_t expected_data)
+ {
+ struct cxl_mbox_get_feat_in *feat_in;
+ struct fwctl_rpc_cxl_out *out;
+ struct fwctl_rpc rpc = {0};
+ struct fwctl_rpc_cxl *in;
+ size_t out_size, in_size;
+ uint32_t val;
+ void *data;
+ int rc;
+
+ in_size = sizeof(*in) + sizeof(*feat_in);
+ rc = posix_memalign((void **)&in, 16, in_size);
+ if (rc)
+ return -ENOMEM;
+ memset(in, 0, in_size);
+ feat_in = &in->get_feat_in;
+
+ uuid_copy(feat_in->uuid, feat_ctx->uuid);
+ feat_in->count = feat_ctx->get_size;
+
+ out_size = sizeof(*out) + feat_ctx->get_size;
+ rc = posix_memalign((void **)&out, 16, out_size);
+ if (rc)
+ goto free_in;
+ memset(out, 0, out_size);
+
+ in->opcode = CXL_MBOX_OPCODE_GET_FEATURE;
+ in->op_size = sizeof(*feat_in);
+
+ rpc.size = sizeof(rpc);
+ rpc.scope = FWCTL_RPC_CONFIGURATION;
+ rpc.in_len = in_size;
+ rpc.out_len = out_size;
+ rpc.in = (uint64_t)(uint64_t *)in;
+ rpc.out = (uint64_t)(uint64_t *)out;
+
+ rc = send_command(fd, &rpc, out);
+ if (rc)
+ goto free_all;
+
+ data = out->payload;
+ val = le32toh(*(__le32 *)data);
+ if (memcmp(&val, &expected_data, sizeof(val)) != 0) {
+ rc = -ENXIO;
+ goto free_all;
+ }
+
+ free_all:
+ free(out);
+ free_in:
+ free(in);
+ return rc;
+ }
+
+Take a look at CXL CLI test directory
+<https://github.com/pmem/ndctl/tree/main/test/fwctl.c> for a detailed user code
+for examples on how to exercise this path.
+
+
+fwctl cxl Kernel API
+====================
+
+.. kernel-doc:: drivers/cxl/core/features.c
+ :export:
+.. kernel-doc:: include/cxl/features.h
diff --git a/Documentation/userspace-api/fwctl/fwctl.rst b/Documentation/userspace-api/fwctl/fwctl.rst
new file mode 100644
index 000000000000..fdcfe418a83f
--- /dev/null
+++ b/Documentation/userspace-api/fwctl/fwctl.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+fwctl subsystem
+===============
+
+:Author: Jason Gunthorpe
+
+Overview
+========
+
+Modern devices contain extensive amounts of FW, and in many cases, are largely
+software-defined pieces of hardware. The evolution of this approach is largely a
+reaction to Moore's Law where a chip tape out is now highly expensive, and the
+chip design is extremely large. Replacing fixed HW logic with a flexible and
+tightly coupled FW/HW combination is an effective risk mitigation against chip
+respin. Problems in the HW design can be counteracted in device FW. This is
+especially true for devices which present a stable and backwards compatible
+interface to the operating system driver (such as NVMe).
+
+The FW layer in devices has grown to incredible size and devices frequently
+integrate clusters of fast processors to run it. For example, mlx5 devices have
+over 30MB of FW code, and big configurations operate with over 1GB of FW managed
+runtime state.
+
+The availability of such a flexible layer has created quite a variety in the
+industry where single pieces of silicon are now configurable software-defined
+devices and can operate in substantially different ways depending on the need.
+Further, we often see cases where specific sites wish to operate devices in ways
+that are highly specialized and require applications that have been tailored to
+their unique configuration.
+
+Further, devices have become multi-functional and integrated to the point they
+no longer fit neatly into the kernel's division of subsystems. Modern
+multi-functional devices have drivers, such as bnxt/ice/mlx5/pds, that span many
+subsystems while sharing the underlying hardware using the auxiliary device
+system.
+
+All together this creates a challenge for the operating system, where devices
+have an expansive FW environment that needs robust device-specific debugging
+support, and FW-driven functionality that is not well suited to “generic”
+interfaces. fwctl seeks to allow access to the full device functionality from
+user space in the areas of debuggability, management, and first-boot/nth-boot
+provisioning.
+
+fwctl is aimed at the common device design pattern where the OS and FW
+communicate via an RPC message layer constructed with a queue or mailbox scheme.
+In this case the driver will typically have some layer to deliver RPC messages
+and collect RPC responses from device FW. The in-kernel subsystem drivers that
+operate the device for its primary purposes will use these RPCs to build their
+drivers, but devices also usually have a set of ancillary RPCs that don't really
+fit into any specific subsystem. For example, a HW RAID controller is primarily
+operated by the block layer but also comes with a set of RPCs to administer the
+construction of drives within the HW RAID.
+
+In the past when devices were more single function, individual subsystems would
+grow different approaches to solving some of these common problems. For instance
+monitoring device health, manipulating its FLASH, debugging the FW,
+provisioning, all have various unique interfaces across the kernel.
+
+fwctl's purpose is to define a common set of limited rules, described below,
+that allow user space to securely construct and execute RPCs inside device FW.
+The rules serve as an agreement between the operating system and FW on how to
+correctly design the RPC interface. As a uAPI the subsystem provides a thin
+layer of discovery and a generic uAPI to deliver the RPCs and collect the
+response. It supports a system of user space libraries and tools which will
+use this interface to control the device using the device native protocols.
+
+Scope of Action
+---------------
+
+fwctl drivers are strictly restricted to being a way to operate the device FW.
+It is not an avenue to access random kernel internals, or other operating system
+SW states.
+
+fwctl instances must operate on a well-defined device function, and the device
+should have a well-defined security model for what scope within the physical
+device the function is permitted to access. For instance, the most complex PCIe
+device today may broadly have several function-level scopes:
+
+ 1. A privileged function with full access to the on-device global state and
+ configuration
+
+ 2. Multiple hypervisor functions with control over itself and child functions
+ used with VMs
+
+ 3. Multiple VM functions tightly scoped within the VM
+
+The device may create a logical parent/child relationship between these scopes.
+For instance a child VM's FW may be within the scope of the hypervisor FW. It is
+quite common in the VFIO world that the hypervisor environment has a complex
+provisioning/profiling/configuration responsibility for the function VFIO
+assigns to the VM.
+
+Further, within the function, devices often have RPC commands that fall within
+some general scopes of action (see enum fwctl_rpc_scope):
+
+ 1. Access to function & child configuration, FLASH, etc. that becomes live at a
+ function reset. Access to function & child runtime configuration that is
+ transparent or non-disruptive to any driver or VM.
+
+ 2. Read-only access to function debug information that may report on FW objects
+ in the function & child, including FW objects owned by other kernel
+ subsystems.
+
+ 3. Write access to function & child debug information strictly compatible with
+ the principles of kernel lockdown and kernel integrity protection. Triggers
+ a kernel Taint.
+
+ 4. Full debug device access. Triggers a kernel Taint, requires CAP_SYS_RAWIO.
+
+User space will provide a scope label on each RPC and the kernel must enforce the
+above CAPs and taints based on that scope. A combination of kernel and FW can
+enforce that RPCs are placed in the correct scope by user space.
+
+Denied behavior
+---------------
+
+There are many things this interface must not allow user space to do (without a
+Taint or CAP), broadly derived from the principles of kernel lockdown. Some
+examples:
+
+ 1. DMA to/from arbitrary memory, hang the system, compromise FW integrity with
+ untrusted code, or otherwise compromise device or system security and
+ integrity.
+
+ 2. Provide an abnormal “back door” to kernel drivers. No manipulation of kernel
+ objects owned by kernel drivers.
+
+ 3. Directly configure or otherwise control kernel drivers. A subsystem kernel
+ driver can react to the device configuration at function reset/driver load
+ time, but otherwise must not be coupled to fwctl.
+
+ 4. Operate the HW in a way that overlaps with the core purpose of another
+ primary kernel subsystem, such as read/write to LBAs, send/receive of
+ network packets, or operate an accelerator's data plane.
+
+fwctl is not a replacement for device direct access subsystems like uacce or
+VFIO.
+
+Operations exposed through fwctl's non-taining interfaces should be fully
+sharable with other users of the device. For instance exposing a RPC through
+fwctl should never prevent a kernel subsystem from also concurrently using that
+same RPC or hardware unit down the road. In such cases fwctl will be less
+important than proper kernel subsystems that eventually emerge. Mistakes in this
+area resulting in clashes will be resolved in favour of a kernel implementation.
+
+fwctl User API
+==============
+
+.. kernel-doc:: include/uapi/fwctl/fwctl.h
+.. kernel-doc:: include/uapi/fwctl/mlx5.h
+.. kernel-doc:: include/uapi/fwctl/pds.h
+
+sysfs Class
+-----------
+
+fwctl has a sysfs class (/sys/class/fwctl/fwctlNN/) and character devices
+(/dev/fwctl/fwctlNN) with a simple numbered scheme. The character device
+operates the iotcl uAPI described above.
+
+fwctl devices can be related to driver components in other subsystems through
+sysfs::
+
+ $ ls /sys/class/fwctl/fwctl0/device/infiniband/
+ ibp0s10f0
+
+ $ ls /sys/class/infiniband/ibp0s10f0/device/fwctl/
+ fwctl0/
+
+ $ ls /sys/devices/pci0000:00/0000:00:0a.0/fwctl/fwctl0
+ dev device power subsystem uevent
+
+User space Community
+--------------------
+
+Drawing inspiration from nvme-cli, participating in the kernel side must come
+with a user space in a common TBD git tree, at a minimum to usefully operate the
+kernel driver. Providing such an implementation is a pre-condition to merging a
+kernel driver.
+
+The goal is to build user space community around some of the shared problems
+we all have, and ideally develop some common user space programs with some
+starting themes of:
+
+ - Device in-field debugging
+
+ - HW provisioning
+
+ - VFIO child device profiling before VM boot
+
+ - Confidential Compute topics (attestation, secure provisioning)
+
+that stretch across all subsystems in the kernel. fwupd is a great example of
+how an excellent user space experience can emerge out of kernel-side diversity.
+
+fwctl Kernel API
+================
+
+.. kernel-doc:: drivers/fwctl/main.c
+ :export:
+.. kernel-doc:: include/linux/fwctl.h
+
+fwctl Driver design
+-------------------
+
+In many cases a fwctl driver is going to be part of a larger cross-subsystem
+device possibly using the auxiliary_device mechanism. In that case several
+subsystems are going to be sharing the same device and FW interface layer so the
+device design must already provide for isolation and cooperation between kernel
+subsystems. fwctl should fit into that same model.
+
+Part of the driver should include a description of how its scope restrictions
+and security model work. The driver and FW together must ensure that RPCs
+provided by user space are mapped to the appropriate scope. If the validation is
+done in the driver then the validation can read a 'command effects' report from
+the device, or hardwire the enforcement. If the validation is done in the FW,
+then the driver should pass the fwctl_rpc_scope to the FW along with the command.
+
+The driver and FW must cooperate to ensure that either fwctl cannot allocate
+any FW resources, or any resources it does allocate are freed on FD closure. A
+driver primarily constructed around FW RPCs may find that its core PCI function
+and RPC layer belongs under fwctl with auxiliary devices connecting to other
+subsystems.
+
+Each device type must be mindful of Linux's philosophy for stable ABI. The FW
+RPC interface does not have to meet a strictly stable ABI, but it does need to
+meet an expectation that userspace tools that are deployed and in significant
+use don't needlessly break. FW upgrade and kernel upgrade should keep widely
+deployed tooling working.
+
+Development and debugging focused RPCs under more permissive scopes can have
+less stabilitiy if the tools using them are only run under exceptional
+circumstances and not for every day use of the device. Debugging tools may even
+require exact version matching as they may require something similar to DWARF
+debug information from the FW binary.
+
+Security Response
+=================
+
+The kernel remains the gatekeeper for this interface. If violations of the
+scopes, security or isolation principles are found, we have options to let
+devices fix them with a FW update, push a kernel patch to parse and block RPC
+commands or push a kernel patch to block entire firmware versions/devices.
+
+While the kernel can always directly parse and restrict RPCs, it is expected
+that the existing kernel pattern of allowing drivers to delegate validation to
+FW to be a useful design.
+
+Existing Similar Examples
+=========================
+
+The approach described in this document is not a new idea. Direct, or near
+direct device access has been offered by the kernel in different areas for
+decades. With more devices wanting to follow this design pattern it is becoming
+clear that it is not entirely well understood and, more importantly, the
+security considerations are not well defined or agreed upon.
+
+Some examples:
+
+ - HW RAID controllers. This includes RPCs to do things like compose drives into
+ a RAID volume, configure RAID parameters, monitor the HW and more.
+
+ - Baseboard managers. RPCs for configuring settings in the device and more
+
+ - NVMe vendor command capsules. nvme-cli provides access to some monitoring
+ functions that different products have defined, but more exist.
+
+ - CXL also has a NVMe-like vendor command system.
+
+ - DRM allows user space drivers to send commands to the device via kernel
+ mediation
+
+ - RDMA allows user space drivers to directly push commands to the device
+ without kernel involvement
+
+ - Various “raw” APIs, raw HID (SDL2), raw USB, NVMe Generic Interface, etc.
+
+The first 4 are examples of areas that fwctl intends to cover. The latter three
+are examples of denied behavior as they fully overlap with the primary purpose
+of a kernel subsystem.
+
+Some key lessons learned from these past efforts are the importance of having a
+common user space project to use as a pre-condition for obtaining a kernel
+driver. Developing good community around useful software in user space is key to
+getting companies to fund participation to enable their products.
diff --git a/Documentation/userspace-api/fwctl/index.rst b/Documentation/userspace-api/fwctl/index.rst
new file mode 100644
index 000000000000..316ac456ad3b
--- /dev/null
+++ b/Documentation/userspace-api/fwctl/index.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Firmware Control (FWCTL) Userspace API
+======================================
+
+A framework that define a common set of limited rules that allows user space
+to securely construct and execute RPCs inside device firmware.
+
+.. toctree::
+ :maxdepth: 1
+
+ fwctl
+ fwctl-cxl
+ pds_fwctl
diff --git a/Documentation/userspace-api/fwctl/pds_fwctl.rst b/Documentation/userspace-api/fwctl/pds_fwctl.rst
new file mode 100644
index 000000000000..b5a31f82c883
--- /dev/null
+++ b/Documentation/userspace-api/fwctl/pds_fwctl.rst
@@ -0,0 +1,46 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+fwctl pds driver
+================
+
+:Author: Shannon Nelson
+
+Overview
+========
+
+The PDS Core device makes a fwctl service available through an
+auxiliary_device named pds_core.fwctl.N. The pds_fwctl driver binds to
+this device and registers itself with the fwctl subsystem. The resulting
+userspace interface is used by an application that is a part of the
+AMD Pensando software package for the Distributed Service Card (DSC).
+
+The pds_fwctl driver has little knowledge of the firmware's internals.
+It only knows how to send commands through pds_core's message queue to the
+firmware for fwctl requests. The set of fwctl operations available
+depends on the firmware in the DSC, and the userspace application
+version must match the firmware so that they can talk to each other.
+
+When a connection is created the pds_fwctl driver requests from the
+firmware a list of firmware object endpoints, and for each endpoint the
+driver requests a list of operations for that endpoint.
+
+Each operation description includes a firmware defined command attribute
+that maps to the FWCTL scope levels. The driver translates those firmware
+values into the FWCTL scope values which can then be used for filtering the
+scoped user requests.
+
+pds_fwctl User API
+==================
+
+Each RPC request includes the target endpoint and the operation id, and in
+and out buffer lengths and pointers. The driver verifies the existence
+of the requested endpoint and operations, then checks the request scope
+against the required scope of the operation. The request is then put
+together with the request data and sent through pds_core's message queue
+to the firmware, and the results are returned to the caller.
+
+The RPC endpoints, operations, and buffer contents are defined by the
+particular firmware package in the device, which varies across the
+available product configurations. The details are available in the
+specific product SDK documentation.
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 9cbe4390c872..b8c73be4fb11 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -46,6 +46,7 @@ Devices and I/O
accelerators/ocxl
dma-buf-heaps
dma-buf-alloc-exchange
+ fwctl/index
gpio/index
iommufd
media/index
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 3cf4183b767e..3d1cd7ad9d67 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -333,6 +333,7 @@ Code Seq# Include File Comments
0x97 00-7F fs/ceph/ioctl.h Ceph file system
0x99 00-0F 537-Addinboard driver
<mailto:buk@buks.ipn.de>
+0x9A 00-0F include/uapi/fwctl/fwctl.h
0xA0 all linux/sdp/sdp.h Industrial Device Project
<mailto:kenji@bitgate.com>
0xA1 0 linux/vtpm_proxy.h TPM Emulator Proxy Driver
diff --git a/MAINTAINERS b/MAINTAINERS
index 3347bd1dd615..85061326dcb0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5886,6 +5886,7 @@ M: Dan Williams <dan.j.williams@intel.com>
L: linux-cxl@vger.kernel.org
S: Maintained
F: Documentation/driver-api/cxl
+F: Documentation/userspace-api/fwctl/fwctl-cxl.rst
F: drivers/cxl/
F: include/cxl/
F: include/uapi/linux/cxl_mem.h
@@ -9686,6 +9687,31 @@ F: kernel/futex/*
F: tools/perf/bench/futex*
F: tools/testing/selftests/futex/
+FWCTL SUBSYSTEM
+M: Dave Jiang <dave.jiang@intel.com>
+M: Jason Gunthorpe <jgg@nvidia.com>
+M: Saeed Mahameed <saeedm@nvidia.com>
+R: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+S: Maintained
+F: Documentation/userspace-api/fwctl/
+F: drivers/fwctl/
+F: include/linux/fwctl.h
+F: include/uapi/fwctl/
+
+FWCTL MLX5 DRIVER
+M: Saeed Mahameed <saeedm@nvidia.com>
+R: Itay Avraham <itayavr@nvidia.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: drivers/fwctl/mlx5/
+
+FWCTL PDS DRIVER
+M: Brett Creeley <brett.creeley@amd.com>
+R: Shannon Nelson <shannon.nelson@amd.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: drivers/fwctl/pds/
+
GALAXYCORE GC0308 CAMERA SENSOR DRIVER
M: Sebastian Reichel <sre@kernel.org>
L: linux-media@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 7bdad836fc62..7c556c5ac4fd 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -21,6 +21,8 @@ source "drivers/connector/Kconfig"
source "drivers/firmware/Kconfig"
+source "drivers/fwctl/Kconfig"
+
source "drivers/gnss/Kconfig"
source "drivers/mtd/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 45d1c3e630f7..b5749cf67044 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -135,6 +135,7 @@ obj-y += ufs/
obj-$(CONFIG_MEMSTICK) += memstick/
obj-$(CONFIG_INFINIBAND) += infiniband/
obj-y += firmware/
+obj-$(CONFIG_FWCTL) += fwctl/
obj-$(CONFIG_CRYPTO) += crypto/
obj-$(CONFIG_SUPERH) += sh/
obj-y += clocksource/
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 876469e23f7a..8ac1e9d70eeb 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -7,6 +7,7 @@ menuconfig CXL_BUS
select PCI_DOE
select FIRMWARE_TABLE
select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS
+ select FWCTL if CXL_FEATURES
help
CXL is a bus that is electrically compatible with PCI Express, but
layers three protocols on that signalling (CXL.io, CXL.cache, and
@@ -102,6 +103,17 @@ config CXL_MEM
If unsure say 'm'.
+config CXL_FEATURES
+ bool "CXL: Features"
+ depends on CXL_PCI
+ help
+ Enable support for CXL Features. A CXL device that includes a mailbox
+ supports commands that allows listing, getting, and setting of
+ optionally defined features such as memory sparing or post package
+ sparing. Vendors may define custom features for the device.
+
+ If unsure say 'n'
+
config CXL_PORT
default CXL_BUS
tristate
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 9259bcc6773c..b0bfbd9eac9b 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -16,3 +16,4 @@ cxl_core-y += pmu.o
cxl_core-y += cdat.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
+cxl_core-$(CONFIG_CXL_FEATURES) += features.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 800466f96a68..17e99a25c29a 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -4,6 +4,8 @@
#ifndef __CXL_CORE_H__
#define __CXL_CORE_H__
+#include <cxl/mailbox.h>
+
extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type;
extern const struct device_type cxl_pmu_type;
@@ -65,9 +67,9 @@ static inline void cxl_region_exit(void)
struct cxl_send_command;
struct cxl_mem_query_commands;
-int cxl_query_cmd(struct cxl_memdev *cxlmd,
+int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q);
-int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
+int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length);
@@ -115,4 +117,15 @@ bool cxl_need_node_perf_attrs_update(int nid);
int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
struct access_coordinate *c);
+#ifdef CONFIG_CXL_FEATURES
+size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ enum cxl_get_feat_selection selection,
+ void *feat_out, size_t feat_out_size, u16 offset,
+ u16 *return_code);
+int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ u8 feat_version, const void *feat_data,
+ size_t feat_data_size, u32 feat_flag, u16 offset,
+ u16 *return_code);
+#endif
+
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c
new file mode 100644
index 000000000000..f4daefe3180e
--- /dev/null
+++ b/drivers/cxl/core/features.c
@@ -0,0 +1,708 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024-2025 Intel Corporation. All rights reserved. */
+#include <linux/fwctl.h>
+#include <linux/device.h>
+#include <cxl/mailbox.h>
+#include <cxl/features.h>
+#include <uapi/fwctl/cxl.h>
+#include "cxl.h"
+#include "core.h"
+#include "cxlmem.h"
+
+/* All the features below are exclusive to the kernel */
+static const uuid_t cxl_exclusive_feats[] = {
+ CXL_FEAT_PATROL_SCRUB_UUID,
+ CXL_FEAT_ECS_UUID,
+ CXL_FEAT_SPPR_UUID,
+ CXL_FEAT_HPPR_UUID,
+ CXL_FEAT_CACHELINE_SPARING_UUID,
+ CXL_FEAT_ROW_SPARING_UUID,
+ CXL_FEAT_BANK_SPARING_UUID,
+ CXL_FEAT_RANK_SPARING_UUID,
+};
+
+static bool is_cxl_feature_exclusive_by_uuid(const uuid_t *uuid)
+{
+ for (int i = 0; i < ARRAY_SIZE(cxl_exclusive_feats); i++) {
+ if (uuid_equal(uuid, &cxl_exclusive_feats[i]))
+ return true;
+ }
+
+ return false;
+}
+
+static bool is_cxl_feature_exclusive(struct cxl_feat_entry *entry)
+{
+ return is_cxl_feature_exclusive_by_uuid(&entry->uuid);
+}
+
+inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
+{
+ return cxlds->cxlfs;
+}
+EXPORT_SYMBOL_NS_GPL(to_cxlfs, "CXL");
+
+static int cxl_get_supported_features_count(struct cxl_mailbox *cxl_mbox)
+{
+ struct cxl_mbox_get_sup_feats_out mbox_out;
+ struct cxl_mbox_get_sup_feats_in mbox_in;
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ memset(&mbox_in, 0, sizeof(mbox_in));
+ mbox_in.count = cpu_to_le32(sizeof(mbox_out));
+ memset(&mbox_out, 0, sizeof(mbox_out));
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
+ .size_in = sizeof(mbox_in),
+ .payload_in = &mbox_in,
+ .size_out = sizeof(mbox_out),
+ .payload_out = &mbox_out,
+ .min_out = sizeof(mbox_out),
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ return le16_to_cpu(mbox_out.supported_feats);
+}
+
+static struct cxl_feat_entries *
+get_supported_features(struct cxl_features_state *cxlfs)
+{
+ int remain_feats, max_size, max_feats, start, rc, hdr_size;
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ int feat_size = sizeof(struct cxl_feat_entry);
+ struct cxl_mbox_get_sup_feats_in mbox_in;
+ struct cxl_feat_entry *entry;
+ struct cxl_mbox_cmd mbox_cmd;
+ int user_feats = 0;
+ int count;
+
+ count = cxl_get_supported_features_count(cxl_mbox);
+ if (count <= 0)
+ return NULL;
+
+ struct cxl_feat_entries *entries __free(kvfree) =
+ kvmalloc(struct_size(entries, ent, count), GFP_KERNEL);
+ if (!entries)
+ return NULL;
+
+ struct cxl_mbox_get_sup_feats_out *mbox_out __free(kvfree) =
+ kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!mbox_out)
+ return NULL;
+
+ hdr_size = struct_size(mbox_out, ents, 0);
+ max_size = cxl_mbox->payload_size - hdr_size;
+ /* max feat entries that can fit in mailbox max payload size */
+ max_feats = max_size / feat_size;
+ entry = entries->ent;
+
+ start = 0;
+ remain_feats = count;
+ do {
+ int retrieved, alloc_size, copy_feats;
+ int num_entries;
+
+ if (remain_feats > max_feats) {
+ alloc_size = struct_size(mbox_out, ents, max_feats);
+ remain_feats = remain_feats - max_feats;
+ copy_feats = max_feats;
+ } else {
+ alloc_size = struct_size(mbox_out, ents, remain_feats);
+ copy_feats = remain_feats;
+ remain_feats = 0;
+ }
+
+ memset(&mbox_in, 0, sizeof(mbox_in));
+ mbox_in.count = cpu_to_le32(alloc_size);
+ mbox_in.start_idx = cpu_to_le16(start);
+ memset(mbox_out, 0, alloc_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
+ .size_in = sizeof(mbox_in),
+ .payload_in = &mbox_in,
+ .size_out = alloc_size,
+ .payload_out = mbox_out,
+ .min_out = hdr_size,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0)
+ return NULL;
+
+ if (mbox_cmd.size_out <= hdr_size)
+ return NULL;
+
+ /*
+ * Make sure retrieved out buffer is multiple of feature
+ * entries.
+ */
+ retrieved = mbox_cmd.size_out - hdr_size;
+ if (retrieved % feat_size)
+ return NULL;
+
+ num_entries = le16_to_cpu(mbox_out->num_entries);
+ /*
+ * If the reported output entries * defined entry size !=
+ * retrieved output bytes, then the output package is incorrect.
+ */
+ if (num_entries * feat_size != retrieved)
+ return NULL;
+
+ memcpy(entry, mbox_out->ents, retrieved);
+ for (int i = 0; i < num_entries; i++) {
+ if (!is_cxl_feature_exclusive(entry + i))
+ user_feats++;
+ }
+ entry += num_entries;
+ /*
+ * If the number of output entries is less than expected, add the
+ * remaining entries to the next batch.
+ */
+ remain_feats += copy_feats - num_entries;
+ start += num_entries;
+ } while (remain_feats);
+
+ entries->num_features = count;
+ entries->num_user_features = user_feats;
+
+ return no_free_ptr(entries);
+}
+
+static void free_cxlfs(void *_cxlfs)
+{
+ struct cxl_features_state *cxlfs = _cxlfs;
+ struct cxl_dev_state *cxlds = cxlfs->cxlds;
+
+ cxlds->cxlfs = NULL;
+ kvfree(cxlfs->entries);
+ kfree(cxlfs);
+}
+
+/**
+ * devm_cxl_setup_features() - Allocate and initialize features context
+ * @cxlds: CXL device context
+ *
+ * Return 0 on success or -errno on failure.
+ */
+int devm_cxl_setup_features(struct cxl_dev_state *cxlds)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
+
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
+ return -ENODEV;
+
+ struct cxl_features_state *cxlfs __free(kfree) =
+ kzalloc(sizeof(*cxlfs), GFP_KERNEL);
+ if (!cxlfs)
+ return -ENOMEM;
+
+ cxlfs->cxlds = cxlds;
+
+ cxlfs->entries = get_supported_features(cxlfs);
+ if (!cxlfs->entries)
+ return -ENOMEM;
+
+ cxlds->cxlfs = cxlfs;
+
+ return devm_add_action_or_reset(cxlds->dev, free_cxlfs, no_free_ptr(cxlfs));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_features, "CXL");
+
+size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ enum cxl_get_feat_selection selection,
+ void *feat_out, size_t feat_out_size, u16 offset,
+ u16 *return_code)
+{
+ size_t data_to_rd_size, size_out;
+ struct cxl_mbox_get_feat_in pi;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t data_rcvd_size = 0;
+ int rc;
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_INPUT;
+
+ if (!feat_out || !feat_out_size)
+ return 0;
+
+ size_out = min(feat_out_size, cxl_mbox->payload_size);
+ uuid_copy(&pi.uuid, feat_uuid);
+ pi.selection = selection;
+ do {
+ data_to_rd_size = min(feat_out_size - data_rcvd_size,
+ cxl_mbox->payload_size);
+ pi.offset = cpu_to_le16(offset + data_rcvd_size);
+ pi.count = cpu_to_le16(data_to_rd_size);
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_FEATURE,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ .size_out = size_out,
+ .payload_out = feat_out + data_rcvd_size,
+ .min_out = data_to_rd_size,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0 || !mbox_cmd.size_out) {
+ if (return_code)
+ *return_code = mbox_cmd.return_code;
+ return 0;
+ }
+ data_rcvd_size += mbox_cmd.size_out;
+ } while (data_rcvd_size < feat_out_size);
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_SUCCESS;
+
+ return data_rcvd_size;
+}
+
+/*
+ * FEAT_DATA_MIN_PAYLOAD_SIZE - min extra number of bytes should be
+ * available in the mailbox for storing the actual feature data so that
+ * the feature data transfer would work as expected.
+ */
+#define FEAT_DATA_MIN_PAYLOAD_SIZE 10
+int cxl_set_feature(struct cxl_mailbox *cxl_mbox,
+ const uuid_t *feat_uuid, u8 feat_version,
+ const void *feat_data, size_t feat_data_size,
+ u32 feat_flag, u16 offset, u16 *return_code)
+{
+ size_t data_in_size, data_sent_size = 0;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t hdr_size;
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_INPUT;
+
+ struct cxl_mbox_set_feat_in *pi __free(kfree) =
+ kzalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!pi)
+ return -ENOMEM;
+
+ uuid_copy(&pi->uuid, feat_uuid);
+ pi->version = feat_version;
+ feat_flag &= ~CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK;
+ feat_flag |= CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET;
+ hdr_size = sizeof(pi->hdr);
+ /*
+ * Check minimum mbox payload size is available for
+ * the feature data transfer.
+ */
+ if (hdr_size + FEAT_DATA_MIN_PAYLOAD_SIZE > cxl_mbox->payload_size)
+ return -ENOMEM;
+
+ if (hdr_size + feat_data_size <= cxl_mbox->payload_size) {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER);
+ data_in_size = feat_data_size;
+ } else {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER);
+ data_in_size = cxl_mbox->payload_size - hdr_size;
+ }
+
+ do {
+ int rc;
+
+ pi->offset = cpu_to_le16(offset + data_sent_size);
+ memcpy(pi->feat_data, feat_data + data_sent_size, data_in_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_FEATURE,
+ .size_in = hdr_size + data_in_size,
+ .payload_in = pi,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0) {
+ if (return_code)
+ *return_code = mbox_cmd.return_code;
+ return rc;
+ }
+
+ data_sent_size += data_in_size;
+ if (data_sent_size >= feat_data_size) {
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_SUCCESS;
+ return 0;
+ }
+
+ if ((feat_data_size - data_sent_size) <= (cxl_mbox->payload_size - hdr_size)) {
+ data_in_size = feat_data_size - data_sent_size;
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER);
+ } else {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER);
+ }
+ } while (true);
+}
+
+/* FWCTL support */
+
+static inline struct cxl_memdev *fwctl_to_memdev(struct fwctl_device *fwctl_dev)
+{
+ return to_cxl_memdev(fwctl_dev->dev.parent);
+}
+
+static int cxlctl_open_uctx(struct fwctl_uctx *uctx)
+{
+ return 0;
+}
+
+static void cxlctl_close_uctx(struct fwctl_uctx *uctx)
+{
+}
+
+static struct cxl_feat_entry *
+get_support_feature_info(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in)
+{
+ struct cxl_feat_entry *feat;
+ const uuid_t *uuid;
+
+ if (rpc_in->op_size < sizeof(uuid))
+ return ERR_PTR(-EINVAL);
+
+ uuid = &rpc_in->set_feat_in.uuid;
+
+ for (int i = 0; i < cxlfs->entries->num_features; i++) {
+ feat = &cxlfs->entries->ent[i];
+ if (uuid_equal(uuid, &feat->uuid))
+ return feat;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static void *cxlctl_get_supported_features(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ const struct cxl_mbox_get_sup_feats_in *feat_in;
+ struct cxl_mbox_get_sup_feats_out *feat_out;
+ struct cxl_feat_entry *pos;
+ size_t out_size;
+ int requested;
+ u32 count;
+ u16 start;
+ int i;
+
+ if (rpc_in->op_size != sizeof(*feat_in))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->get_sup_feats_in;
+ count = le32_to_cpu(feat_in->count);
+ start = le16_to_cpu(feat_in->start_idx);
+ requested = count / sizeof(*pos);
+
+ /*
+ * Make sure that the total requested number of entries is not greater
+ * than the total number of supported features allowed for userspace.
+ */
+ if (start >= cxlfs->entries->num_features)
+ return ERR_PTR(-EINVAL);
+
+ requested = min_t(int, requested, cxlfs->entries->num_features - start);
+
+ out_size = sizeof(struct fwctl_rpc_cxl_out) +
+ struct_size(feat_out, ents, requested);
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ rpc_out->size = struct_size(feat_out, ents, requested);
+ feat_out = &rpc_out->get_sup_feats_out;
+ if (requested == 0) {
+ feat_out->num_entries = cpu_to_le16(requested);
+ feat_out->supported_feats =
+ cpu_to_le16(cxlfs->entries->num_features);
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len = out_size;
+ return no_free_ptr(rpc_out);
+ }
+
+ for (i = start, pos = &feat_out->ents[0];
+ i < cxlfs->entries->num_features; i++, pos++) {
+ if (i - start == requested)
+ break;
+
+ memcpy(pos, &cxlfs->entries->ent[i], sizeof(*pos));
+ /*
+ * If the feature is exclusive, set the set_feat_size to 0 to
+ * indicate that the feature is not changeable.
+ */
+ if (is_cxl_feature_exclusive(pos)) {
+ u32 flags;
+
+ pos->set_feat_size = 0;
+ flags = le32_to_cpu(pos->flags);
+ flags &= ~CXL_FEATURE_F_CHANGEABLE;
+ pos->flags = cpu_to_le32(flags);
+ }
+ }
+
+ feat_out->num_entries = cpu_to_le16(requested);
+ feat_out->supported_feats = cpu_to_le16(cxlfs->entries->num_features);
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len = out_size;
+
+ return no_free_ptr(rpc_out);
+}
+
+static void *cxlctl_get_feature(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ const struct cxl_mbox_get_feat_in *feat_in;
+ u16 offset, count, return_code;
+ size_t out_size = *out_len;
+
+ if (rpc_in->op_size != sizeof(*feat_in))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->get_feat_in;
+ offset = le16_to_cpu(feat_in->offset);
+ count = le16_to_cpu(feat_in->count);
+
+ if (!count)
+ return ERR_PTR(-EINVAL);
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ out_size = cxl_get_feature(cxl_mbox, &feat_in->uuid,
+ feat_in->selection, rpc_out->payload,
+ count, offset, &return_code);
+ *out_len = sizeof(struct fwctl_rpc_cxl_out);
+ if (!out_size) {
+ rpc_out->size = 0;
+ rpc_out->retval = return_code;
+ return no_free_ptr(rpc_out);
+ }
+
+ rpc_out->size = out_size;
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len += out_size;
+
+ return no_free_ptr(rpc_out);
+}
+
+static void *cxlctl_set_feature(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ const struct cxl_mbox_set_feat_in *feat_in;
+ size_t out_size, data_size;
+ u16 offset, return_code;
+ u32 flags;
+ int rc;
+
+ if (rpc_in->op_size <= sizeof(feat_in->hdr))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->set_feat_in;
+
+ if (is_cxl_feature_exclusive_by_uuid(&feat_in->uuid))
+ return ERR_PTR(-EPERM);
+
+ offset = le16_to_cpu(feat_in->offset);
+ flags = le32_to_cpu(feat_in->flags);
+ out_size = *out_len;
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ rpc_out->size = 0;
+
+ data_size = rpc_in->op_size - sizeof(feat_in->hdr);
+ rc = cxl_set_feature(cxl_mbox, &feat_in->uuid,
+ feat_in->version, feat_in->feat_data,
+ data_size, flags, offset, &return_code);
+ if (rc) {
+ rpc_out->retval = return_code;
+ return no_free_ptr(rpc_out);
+ }
+
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len = sizeof(*rpc_out);
+
+ return no_free_ptr(rpc_out);
+}
+
+static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ enum fwctl_rpc_scope scope)
+{
+ u16 effects, imm_mask, reset_mask;
+ struct cxl_feat_entry *feat;
+ u32 flags;
+
+ feat = get_support_feature_info(cxlfs, rpc_in);
+ if (IS_ERR(feat))
+ return false;
+
+ /* Ensure that the attribute is changeable */
+ flags = le32_to_cpu(feat->flags);
+ if (!(flags & CXL_FEATURE_F_CHANGEABLE))
+ return false;
+
+ effects = le16_to_cpu(feat->effects);
+
+ /*
+ * Reserved bits are set, rejecting since the effects is not
+ * comprehended by the driver.
+ */
+ if (effects & CXL_CMD_EFFECTS_RESERVED) {
+ dev_warn_once(cxlfs->cxlds->dev,
+ "Reserved bits set in the Feature effects field!\n");
+ return false;
+ }
+
+ /* Currently no user background command support */
+ if (effects & CXL_CMD_BACKGROUND)
+ return false;
+
+ /* Effects cause immediate change, highest security scope is needed */
+ imm_mask = CXL_CMD_CONFIG_CHANGE_IMMEDIATE |
+ CXL_CMD_DATA_CHANGE_IMMEDIATE |
+ CXL_CMD_POLICY_CHANGE_IMMEDIATE |
+ CXL_CMD_LOG_CHANGE_IMMEDIATE;
+
+ reset_mask = CXL_CMD_CONFIG_CHANGE_COLD_RESET |
+ CXL_CMD_CONFIG_CHANGE_CONV_RESET |
+ CXL_CMD_CONFIG_CHANGE_CXL_RESET;
+
+ /* If no immediate or reset effect set, The hardware has a bug */
+ if (!(effects & imm_mask) && !(effects & reset_mask))
+ return false;
+
+ /*
+ * If the Feature setting causes immediate configuration change
+ * then we need the full write permission policy.
+ */
+ if (effects & imm_mask && scope >= FWCTL_RPC_DEBUG_WRITE_FULL)
+ return true;
+
+ /*
+ * If the Feature setting only causes configuration change
+ * after a reset, then the lesser level of write permission
+ * policy is ok.
+ */
+ if (!(effects & imm_mask) && scope >= FWCTL_RPC_DEBUG_WRITE)
+ return true;
+
+ return false;
+}
+
+static bool cxlctl_validate_hw_command(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ enum fwctl_rpc_scope scope,
+ u16 opcode)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ case CXL_MBOX_OP_GET_FEATURE:
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
+ return false;
+ if (scope >= FWCTL_RPC_CONFIGURATION)
+ return true;
+ return false;
+ case CXL_MBOX_OP_SET_FEATURE:
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RW)
+ return false;
+ return cxlctl_validate_set_features(cxlfs, rpc_in, scope);
+ default:
+ return false;
+ }
+}
+
+static void *cxlctl_handle_commands(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len, u16 opcode)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ return cxlctl_get_supported_features(cxlfs, rpc_in, out_len);
+ case CXL_MBOX_OP_GET_FEATURE:
+ return cxlctl_get_feature(cxlfs, rpc_in, out_len);
+ case CXL_MBOX_OP_SET_FEATURE:
+ return cxlctl_set_feature(cxlfs, rpc_in, out_len);
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static void *cxlctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *in, size_t in_len, size_t *out_len)
+{
+ struct fwctl_device *fwctl_dev = uctx->fwctl;
+ struct cxl_memdev *cxlmd = fwctl_to_memdev(fwctl_dev);
+ struct cxl_features_state *cxlfs = to_cxlfs(cxlmd->cxlds);
+ const struct fwctl_rpc_cxl *rpc_in = in;
+ u16 opcode = rpc_in->opcode;
+
+ if (!cxlctl_validate_hw_command(cxlfs, rpc_in, scope, opcode))
+ return ERR_PTR(-EINVAL);
+
+ return cxlctl_handle_commands(cxlfs, rpc_in, out_len, opcode);
+}
+
+static const struct fwctl_ops cxlctl_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_CXL,
+ .uctx_size = sizeof(struct fwctl_uctx),
+ .open_uctx = cxlctl_open_uctx,
+ .close_uctx = cxlctl_close_uctx,
+ .fw_rpc = cxlctl_fw_rpc,
+};
+
+DEFINE_FREE(free_fwctl_dev, struct fwctl_device *, if (_T) fwctl_put(_T))
+
+static void free_memdev_fwctl(void *_fwctl_dev)
+{
+ struct fwctl_device *fwctl_dev = _fwctl_dev;
+
+ fwctl_unregister(fwctl_dev);
+ fwctl_put(fwctl_dev);
+}
+
+int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_features_state *cxlfs;
+ int rc;
+
+ cxlfs = to_cxlfs(cxlds);
+ if (!cxlfs)
+ return -ENODEV;
+
+ /* No need to setup FWCTL if there are no user allowed features found */
+ if (!cxlfs->entries->num_user_features)
+ return -ENODEV;
+
+ struct fwctl_device *fwctl_dev __free(free_fwctl_dev) =
+ _fwctl_alloc_device(&cxlmd->dev, &cxlctl_ops, sizeof(*fwctl_dev));
+ if (!fwctl_dev)
+ return -ENOMEM;
+
+ rc = fwctl_register(fwctl_dev);
+ if (rc)
+ return rc;
+
+ return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl,
+ no_free_ptr(fwctl_dev));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL");
+
+MODULE_IMPORT_NS("FWCTL");
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 548564c770c0..78c5346e3e89 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -349,40 +349,39 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
return true;
}
-static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
- struct cxl_memdev_state *mds, u16 opcode,
+static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd,
+ struct cxl_mailbox *cxl_mbox, u16 opcode,
size_t in_size, size_t out_size, u64 in_payload)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
- *mbox = (struct cxl_mbox_cmd) {
+ *mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = opcode,
.size_in = in_size,
};
if (in_size) {
- mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
- in_size);
- if (IS_ERR(mbox->payload_in))
- return PTR_ERR(mbox->payload_in);
+ mbox_cmd->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
+ in_size);
+ if (IS_ERR(mbox_cmd->payload_in))
+ return PTR_ERR(mbox_cmd->payload_in);
- if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) {
- dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n",
+ if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) {
+ dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n",
cxl_mem_opcode_to_name(opcode));
- kvfree(mbox->payload_in);
+ kvfree(mbox_cmd->payload_in);
return -EBUSY;
}
}
/* Prepare to handle a full payload for variable sized output */
if (out_size == CXL_VARIABLE_PAYLOAD)
- mbox->size_out = cxl_mbox->payload_size;
+ mbox_cmd->size_out = cxl_mbox->payload_size;
else
- mbox->size_out = out_size;
+ mbox_cmd->size_out = out_size;
- if (mbox->size_out) {
- mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL);
- if (!mbox->payload_out) {
- kvfree(mbox->payload_in);
+ if (mbox_cmd->size_out) {
+ mbox_cmd->payload_out = kvzalloc(mbox_cmd->size_out, GFP_KERNEL);
+ if (!mbox_cmd->payload_out) {
+ kvfree(mbox_cmd->payload_in);
return -ENOMEM;
}
}
@@ -397,10 +396,8 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
- struct cxl_memdev_state *mds)
+ struct cxl_mailbox *cxl_mbox)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
-
if (send_cmd->raw.rsvd)
return -EINVAL;
@@ -415,7 +412,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
return -EPERM;
- dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n");
+ dev_WARN_ONCE(cxl_mbox->host, true, "raw command path used\n");
*mem_cmd = (struct cxl_mem_command) {
.info = {
@@ -431,7 +428,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
- struct cxl_memdev_state *mds)
+ struct cxl_mailbox *cxl_mbox)
{
struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
const struct cxl_command_info *info = &c->info;
@@ -446,11 +443,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
return -EINVAL;
/* Check that the command is enabled for hardware */
- if (!test_bit(info->id, mds->enabled_cmds))
+ if (!test_bit(info->id, cxl_mbox->enabled_cmds))
return -ENOTTY;
/* Check that the command is not claimed for exclusive kernel use */
- if (test_bit(info->id, mds->exclusive_cmds))
+ if (test_bit(info->id, cxl_mbox->exclusive_cmds))
return -EBUSY;
/* Check the input buffer is the expected size */
@@ -479,7 +476,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
/**
* cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
* @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
- * @mds: The driver data for the operation
+ * @cxl_mbox: CXL mailbox context
* @send_cmd: &struct cxl_send_command copied in from userspace.
*
* Return:
@@ -494,10 +491,9 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
* safe to send to the hardware.
*/
static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
- struct cxl_memdev_state *mds,
+ struct cxl_mailbox *cxl_mbox,
const struct cxl_send_command *send_cmd)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mem_command mem_cmd;
int rc;
@@ -514,24 +510,23 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
/* Sanitize and construct a cxl_mem_command */
if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
- rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds);
+ rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxl_mbox);
else
- rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds);
+ rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxl_mbox);
if (rc)
return rc;
/* Sanitize and construct a cxl_mbox_cmd */
- return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode,
+ return cxl_mbox_cmd_ctor(mbox_cmd, cxl_mbox, mem_cmd.opcode,
mem_cmd.info.size_in, mem_cmd.info.size_out,
send_cmd->in.payload);
}
-int cxl_query_cmd(struct cxl_memdev *cxlmd,
+int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
- struct device *dev = &cxlmd->dev;
+ struct device *dev = cxl_mbox->host;
struct cxl_mem_command *cmd;
u32 n_commands;
int j = 0;
@@ -552,9 +547,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
cxl_for_each_cmd(cmd) {
struct cxl_command_info info = cmd->info;
- if (test_bit(info.id, mds->enabled_cmds))
+ if (test_bit(info.id, cxl_mbox->enabled_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
- if (test_bit(info.id, mds->exclusive_cmds))
+ if (test_bit(info.id, cxl_mbox->exclusive_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
@@ -569,7 +564,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
/**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
- * @mds: The driver data for the operation
+ * @cxl_mbox: The mailbox context for the operation.
* @mbox_cmd: The validated mailbox command.
* @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out.
@@ -590,13 +585,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
*
* See cxl_send_cmd().
*/
-static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds,
+static int handle_mailbox_cmd_from_user(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd,
u64 out_payload, s32 *size_out,
u32 *retval)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
- struct device *dev = mds->cxlds.dev;
+ struct device *dev = cxl_mbox->host;
int rc;
dev_dbg(dev,
@@ -633,10 +627,9 @@ out:
return rc;
}
-int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
+int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
- struct device *dev = &cxlmd->dev;
+ struct device *dev = cxl_mbox->host;
struct cxl_send_command send;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -646,11 +639,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
- rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send);
+ rc = cxl_validate_cmd_from_user(&mbox_cmd, cxl_mbox, &send);
if (rc)
return rc;
- rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload,
+ rc = handle_mailbox_cmd_from_user(cxl_mbox, &mbox_cmd, send.out.payload,
&send.out.size, &send.retval);
if (rc)
return rc;
@@ -713,6 +706,35 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
return 0;
}
+static int check_features_opcodes(u16 opcode, int *ro_cmds, int *wr_cmds)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ case CXL_MBOX_OP_GET_FEATURE:
+ (*ro_cmds)++;
+ return 1;
+ case CXL_MBOX_OP_SET_FEATURE:
+ (*wr_cmds)++;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* 'Get Supported Features' and 'Get Feature' */
+#define MAX_FEATURES_READ_CMDS 2
+static void set_features_cap(struct cxl_mailbox *cxl_mbox,
+ int ro_cmds, int wr_cmds)
+{
+ /* Setting up Features capability while walking the CEL */
+ if (ro_cmds == MAX_FEATURES_READ_CMDS) {
+ if (wr_cmds)
+ cxl_mbox->feat_cap = CXL_FEATURES_RW;
+ else
+ cxl_mbox->feat_cap = CXL_FEATURES_RO;
+ }
+}
+
/**
* cxl_walk_cel() - Walk through the Command Effects Log.
* @mds: The driver data for the operation
@@ -724,10 +746,11 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
*/
static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_cel_entry *cel_entry;
const int cel_entries = size / sizeof(*cel_entry);
struct device *dev = mds->cxlds.dev;
- int i;
+ int i, ro_cmds = 0, wr_cmds = 0;
cel_entry = (struct cxl_cel_entry *) cel;
@@ -737,10 +760,13 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
int enabled = 0;
if (cmd) {
- set_bit(cmd->info.id, mds->enabled_cmds);
+ set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
enabled++;
}
+ enabled += check_features_opcodes(opcode, &ro_cmds,
+ &wr_cmds);
+
if (cxl_is_poison_command(opcode)) {
cxl_set_poison_cmd_enabled(&mds->poison, opcode);
enabled++;
@@ -754,6 +780,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
dev_dbg(dev, "Opcode 0x%04x %s\n", opcode,
enabled ? "enabled" : "unsupported by driver");
}
+
+ set_features_cap(cxl_mbox, ro_cmds, wr_cmds);
}
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds)
@@ -807,6 +835,7 @@ static const uuid_t log_uuid[] = {
*/
int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_supported_logs *gsl;
struct device *dev = mds->cxlds.dev;
struct cxl_mem_command *cmd;
@@ -845,7 +874,7 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
/* In case CEL was bogus, enable some default commands. */
cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
- set_bit(cmd->info.id, mds->enabled_cmds);
+ set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
/* Found the required CEL */
rc = 0;
@@ -1448,6 +1477,7 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
mds->cxlds.reg_map.host = dev;
+ mds->cxlds.cxl_mbox.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index ae3dfcbe8938..2e2e035abdaa 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -564,9 +564,11 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL");
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
down_write(&cxl_memdev_rwsem);
- bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
- CXL_MEM_COMMAND_ID_MAX);
+ bitmap_or(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
+ cmds, CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
@@ -579,9 +581,11 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
down_write(&cxl_memdev_rwsem);
- bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
- CXL_MEM_COMMAND_ID_MAX);
+ bitmap_andnot(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
+ cmds, CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL");
@@ -656,11 +660,14 @@ err:
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
unsigned long arg)
{
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
switch (cmd) {
case CXL_MEM_QUERY_COMMANDS:
- return cxl_query_cmd(cxlmd, (void __user *)arg);
+ return cxl_query_cmd(cxl_mbox, (void __user *)arg);
case CXL_MEM_SEND_COMMAND:
- return cxl_send_cmd(cxlmd, (void __user *)arg);
+ return cxl_send_cmd(cxl_mbox, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -994,10 +1001,11 @@ static void cxl_remove_fw_upload(void *fwl)
int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
struct device *dev = &cxlds->cxlmd->dev;
struct fw_upload *fwl;
- if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds))
+ if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, cxl_mbox->enabled_cmds))
return 0;
fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev),
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 2a25d1957ddb..dd2b7060d501 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -106,42 +106,6 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
-/**
- * struct cxl_mbox_cmd - A command to be submitted to hardware.
- * @opcode: (input) The command set and command submitted to hardware.
- * @payload_in: (input) Pointer to the input payload.
- * @payload_out: (output) Pointer to the output payload. Must be allocated by
- * the caller.
- * @size_in: (input) Number of bytes to load from @payload_in.
- * @size_out: (input) Max number of bytes loaded into @payload_out.
- * (output) Number of bytes generated by the device. For fixed size
- * outputs commands this is always expected to be deterministic. For
- * variable sized output commands, it tells the exact number of bytes
- * written.
- * @min_out: (input) internal command output payload size validation
- * @poll_count: (input) Number of timeouts to attempt.
- * @poll_interval_ms: (input) Time between mailbox background command polling
- * interval timeouts.
- * @return_code: (output) Error code returned from hardware.
- *
- * This is the primary mechanism used to send commands to the hardware.
- * All the fields except @payload_* correspond exactly to the fields described in
- * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
- * @payload_out are written to, and read from the Command Payload Registers
- * defined in CXL 2.0 8.2.8.4.8.
- */
-struct cxl_mbox_cmd {
- u16 opcode;
- void *payload_in;
- void *payload_out;
- size_t size_in;
- size_t size_out;
- size_t min_out;
- int poll_count;
- int poll_interval_ms;
- u16 return_code;
-};
-
/*
* Per CXL 3.0 Section 8.2.8.4.5.1
*/
@@ -428,6 +392,7 @@ struct cxl_dpa_perf {
* @serial: PCIe Device Serial Number
* @type: Generic Memory Class device or Vendor Specific Memory device
* @cxl_mbox: CXL mailbox context
+ * @cxlfs: CXL features context
*/
struct cxl_dev_state {
struct device *dev;
@@ -443,6 +408,9 @@ struct cxl_dev_state {
u64 serial;
enum cxl_devtype type;
struct cxl_mailbox cxl_mbox;
+#ifdef CONFIG_CXL_FEATURES
+ struct cxl_features_state *cxlfs;
+#endif
};
static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
@@ -461,8 +429,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
* @lsa_size: Size of Label Storage Area
* (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
* @firmware_version: Firmware version for the memory device.
- * @enabled_cmds: Hardware commands found enabled in CEL.
- * @exclusive_cmds: Commands that are kernel-internal only
* @total_bytes: sum of all possible capacities
* @volatile_only_bytes: hard volatile capacity
* @persistent_only_bytes: hard persistent capacity
@@ -485,8 +451,6 @@ struct cxl_memdev_state {
struct cxl_dev_state cxlds;
size_t lsa_size;
char firmware_version[0x10];
- DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
- DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
u64 total_bytes;
u64 volatile_only_bytes;
u64 persistent_only_bytes;
@@ -530,6 +494,9 @@ enum cxl_opcode {
CXL_MBOX_OP_GET_LOG_CAPS = 0x0402,
CXL_MBOX_OP_CLEAR_LOG = 0x0403,
CXL_MBOX_OP_GET_SUP_LOG_SUBLIST = 0x0405,
+ CXL_MBOX_OP_GET_SUPPORTED_FEATURES = 0x0500,
+ CXL_MBOX_OP_GET_FEATURE = 0x0501,
+ CXL_MBOX_OP_SET_FEATURE = 0x0502,
CXL_MBOX_OP_IDENTIFY = 0x4000,
CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100,
CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index a96e54c6259e..993fa60fe453 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -997,6 +997,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = devm_cxl_setup_features(cxlds);
+ if (rc)
+ dev_dbg(&pdev->dev, "No CXL Features discovered\n");
+
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
@@ -1009,6 +1013,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = devm_cxl_setup_fwctl(cxlmd);
+ if (rc)
+ dev_dbg(&pdev->dev, "No CXL FWCTL setup\n");
+
pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
if (pmu_count < 0)
return pmu_count;
diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
new file mode 100644
index 000000000000..b5583b12a011
--- /dev/null
+++ b/drivers/fwctl/Kconfig
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menuconfig FWCTL
+ tristate "fwctl device firmware access framework"
+ help
+ fwctl provides a userspace API for restricted access to communicate
+ with on-device firmware. The communication channel is intended to
+ support a wide range of lockdown compatible device behaviors including
+ manipulating device FLASH, debugging, and other activities that don't
+ fit neatly into an existing subsystem.
+
+if FWCTL
+config FWCTL_MLX5
+ tristate "mlx5 ConnectX control fwctl driver"
+ depends on MLX5_CORE
+ help
+ MLX5 provides interface for the user process to access the debug and
+ configuration registers of the ConnectX hardware family
+ (NICs, PCI switches and SmartNIC SoCs).
+ This will allow configuration and debug tools to work out of the box on
+ mainstream kernel.
+
+ If you don't know what to do here, say N.
+
+config FWCTL_PDS
+ tristate "AMD/Pensando pds fwctl driver"
+ depends on PDS_CORE
+ help
+ The pds_fwctl driver provides an fwctl interface for a user process
+ to access the debug and configuration information of the AMD/Pensando
+ DSC hardware family.
+
+ If you don't know what to do here, say N.
+endif
diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile
new file mode 100644
index 000000000000..c093b5f661d6
--- /dev/null
+++ b/drivers/fwctl/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL) += fwctl.o
+obj-$(CONFIG_FWCTL_MLX5) += mlx5/
+obj-$(CONFIG_FWCTL_PDS) += pds/
+
+fwctl-y += main.o
diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
new file mode 100644
index 000000000000..cb1ac9c40239
--- /dev/null
+++ b/drivers/fwctl/main.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#define pr_fmt(fmt) "fwctl: " fmt
+#include <linux/fwctl.h>
+
+#include <linux/container_of.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include <uapi/fwctl/fwctl.h>
+
+enum {
+ FWCTL_MAX_DEVICES = 4096,
+ MAX_RPC_LEN = SZ_2M,
+};
+static_assert(FWCTL_MAX_DEVICES < (1U << MINORBITS));
+
+static dev_t fwctl_dev;
+static DEFINE_IDA(fwctl_ida);
+static unsigned long fwctl_tainted;
+
+struct fwctl_ucmd {
+ struct fwctl_uctx *uctx;
+ void __user *ubuffer;
+ void *cmd;
+ u32 user_size;
+};
+
+static int ucmd_respond(struct fwctl_ucmd *ucmd, size_t cmd_len)
+{
+ if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
+ min_t(size_t, ucmd->user_size, cmd_len)))
+ return -EFAULT;
+ return 0;
+}
+
+static int copy_to_user_zero_pad(void __user *to, const void *from,
+ size_t from_len, size_t user_len)
+{
+ size_t copy_len;
+
+ copy_len = min(from_len, user_len);
+ if (copy_to_user(to, from, copy_len))
+ return -EFAULT;
+ if (copy_len < user_len) {
+ if (clear_user(to + copy_len, user_len - copy_len))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int fwctl_cmd_info(struct fwctl_ucmd *ucmd)
+{
+ struct fwctl_device *fwctl = ucmd->uctx->fwctl;
+ struct fwctl_info *cmd = ucmd->cmd;
+ size_t driver_info_len = 0;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ if (!fwctl->ops->info && cmd->device_data_len) {
+ if (clear_user(u64_to_user_ptr(cmd->out_device_data),
+ cmd->device_data_len))
+ return -EFAULT;
+ } else if (cmd->device_data_len) {
+ void *driver_info __free(kfree) =
+ fwctl->ops->info(ucmd->uctx, &driver_info_len);
+ if (IS_ERR(driver_info))
+ return PTR_ERR(driver_info);
+
+ if (copy_to_user_zero_pad(u64_to_user_ptr(cmd->out_device_data),
+ driver_info, driver_info_len,
+ cmd->device_data_len))
+ return -EFAULT;
+ }
+
+ cmd->out_device_type = fwctl->ops->device_type;
+ cmd->device_data_len = driver_info_len;
+ return ucmd_respond(ucmd, sizeof(*cmd));
+}
+
+static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd)
+{
+ struct fwctl_device *fwctl = ucmd->uctx->fwctl;
+ struct fwctl_rpc *cmd = ucmd->cmd;
+ size_t out_len;
+
+ if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN)
+ return -EMSGSIZE;
+
+ switch (cmd->scope) {
+ case FWCTL_RPC_CONFIGURATION:
+ case FWCTL_RPC_DEBUG_READ_ONLY:
+ break;
+
+ case FWCTL_RPC_DEBUG_WRITE_FULL:
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ fallthrough;
+ case FWCTL_RPC_DEBUG_WRITE:
+ if (!test_and_set_bit(0, &fwctl_tainted)) {
+ dev_warn(
+ &fwctl->dev,
+ "%s(%d): has requested full access to the physical device device",
+ current->comm, task_pid_nr(current));
+ add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK);
+ }
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ void *inbuf __free(kvfree) = kvzalloc(cmd->in_len, GFP_KERNEL_ACCOUNT);
+ if (!inbuf)
+ return -ENOMEM;
+ if (copy_from_user(inbuf, u64_to_user_ptr(cmd->in), cmd->in_len))
+ return -EFAULT;
+
+ out_len = cmd->out_len;
+ void *outbuf __free(kvfree) = fwctl->ops->fw_rpc(
+ ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
+ if (IS_ERR(outbuf))
+ return PTR_ERR(outbuf);
+ if (outbuf == inbuf) {
+ /* The driver can re-use inbuf as outbuf */
+ inbuf = NULL;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(cmd->out), outbuf,
+ min(cmd->out_len, out_len)))
+ return -EFAULT;
+
+ cmd->out_len = out_len;
+ return ucmd_respond(ucmd, sizeof(*cmd));
+}
+
+/* On stack memory for the ioctl structs */
+union fwctl_ucmd_buffer {
+ struct fwctl_info info;
+ struct fwctl_rpc rpc;
+};
+
+struct fwctl_ioctl_op {
+ unsigned int size;
+ unsigned int min_size;
+ unsigned int ioctl_num;
+ int (*execute)(struct fwctl_ucmd *ucmd);
+};
+
+#define IOCTL_OP(_ioctl, _fn, _struct, _last) \
+ [_IOC_NR(_ioctl) - FWCTL_CMD_BASE] = { \
+ .size = sizeof(_struct) + \
+ BUILD_BUG_ON_ZERO(sizeof(union fwctl_ucmd_buffer) < \
+ sizeof(_struct)), \
+ .min_size = offsetofend(_struct, _last), \
+ .ioctl_num = _ioctl, \
+ .execute = _fn, \
+ }
+static const struct fwctl_ioctl_op fwctl_ioctl_ops[] = {
+ IOCTL_OP(FWCTL_INFO, fwctl_cmd_info, struct fwctl_info, out_device_data),
+ IOCTL_OP(FWCTL_RPC, fwctl_cmd_rpc, struct fwctl_rpc, out),
+};
+
+static long fwctl_fops_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fwctl_uctx *uctx = filp->private_data;
+ const struct fwctl_ioctl_op *op;
+ struct fwctl_ucmd ucmd = {};
+ union fwctl_ucmd_buffer buf;
+ unsigned int nr;
+ int ret;
+
+ nr = _IOC_NR(cmd);
+ if ((nr - FWCTL_CMD_BASE) >= ARRAY_SIZE(fwctl_ioctl_ops))
+ return -ENOIOCTLCMD;
+
+ op = &fwctl_ioctl_ops[nr - FWCTL_CMD_BASE];
+ if (op->ioctl_num != cmd)
+ return -ENOIOCTLCMD;
+
+ ucmd.uctx = uctx;
+ ucmd.cmd = &buf;
+ ucmd.ubuffer = (void __user *)arg;
+ ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
+ if (ret)
+ return ret;
+
+ if (ucmd.user_size < op->min_size)
+ return -EINVAL;
+
+ ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
+ ucmd.user_size);
+ if (ret)
+ return ret;
+
+ guard(rwsem_read)(&uctx->fwctl->registration_lock);
+ if (!uctx->fwctl->ops)
+ return -ENODEV;
+ return op->execute(&ucmd);
+}
+
+static int fwctl_fops_open(struct inode *inode, struct file *filp)
+{
+ struct fwctl_device *fwctl =
+ container_of(inode->i_cdev, struct fwctl_device, cdev);
+ int ret;
+
+ guard(rwsem_read)(&fwctl->registration_lock);
+ if (!fwctl->ops)
+ return -ENODEV;
+
+ struct fwctl_uctx *uctx __free(kfree) =
+ kzalloc(fwctl->ops->uctx_size, GFP_KERNEL_ACCOUNT);
+ if (!uctx)
+ return -ENOMEM;
+
+ uctx->fwctl = fwctl;
+ ret = fwctl->ops->open_uctx(uctx);
+ if (ret)
+ return ret;
+
+ scoped_guard(mutex, &fwctl->uctx_list_lock) {
+ list_add_tail(&uctx->uctx_list_entry, &fwctl->uctx_list);
+ }
+
+ get_device(&fwctl->dev);
+ filp->private_data = no_free_ptr(uctx);
+ return 0;
+}
+
+static void fwctl_destroy_uctx(struct fwctl_uctx *uctx)
+{
+ lockdep_assert_held(&uctx->fwctl->uctx_list_lock);
+ list_del(&uctx->uctx_list_entry);
+ uctx->fwctl->ops->close_uctx(uctx);
+}
+
+static int fwctl_fops_release(struct inode *inode, struct file *filp)
+{
+ struct fwctl_uctx *uctx = filp->private_data;
+ struct fwctl_device *fwctl = uctx->fwctl;
+
+ scoped_guard(rwsem_read, &fwctl->registration_lock) {
+ /*
+ * NULL ops means fwctl_unregister() has already removed the
+ * driver and destroyed the uctx.
+ */
+ if (fwctl->ops) {
+ guard(mutex)(&fwctl->uctx_list_lock);
+ fwctl_destroy_uctx(uctx);
+ }
+ }
+
+ kfree(uctx);
+ fwctl_put(fwctl);
+ return 0;
+}
+
+static const struct file_operations fwctl_fops = {
+ .owner = THIS_MODULE,
+ .open = fwctl_fops_open,
+ .release = fwctl_fops_release,
+ .unlocked_ioctl = fwctl_fops_ioctl,
+};
+
+static void fwctl_device_release(struct device *device)
+{
+ struct fwctl_device *fwctl =
+ container_of(device, struct fwctl_device, dev);
+
+ ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
+ mutex_destroy(&fwctl->uctx_list_lock);
+ kfree(fwctl);
+}
+
+static char *fwctl_devnode(const struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "fwctl/%s", dev_name(dev));
+}
+
+static struct class fwctl_class = {
+ .name = "fwctl",
+ .dev_release = fwctl_device_release,
+ .devnode = fwctl_devnode,
+};
+
+static struct fwctl_device *
+_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
+{
+ struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
+ int devnum;
+
+ if (!fwctl)
+ return NULL;
+
+ devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
+ if (devnum < 0)
+ return NULL;
+
+ fwctl->dev.devt = fwctl_dev + devnum;
+ fwctl->dev.class = &fwctl_class;
+ fwctl->dev.parent = parent;
+
+ init_rwsem(&fwctl->registration_lock);
+ mutex_init(&fwctl->uctx_list_lock);
+ INIT_LIST_HEAD(&fwctl->uctx_list);
+
+ device_initialize(&fwctl->dev);
+ return_ptr(fwctl);
+}
+
+/* Drivers use the fwctl_alloc_device() wrapper */
+struct fwctl_device *_fwctl_alloc_device(struct device *parent,
+ const struct fwctl_ops *ops,
+ size_t size)
+{
+ struct fwctl_device *fwctl __free(fwctl) =
+ _alloc_device(parent, ops, size);
+
+ if (!fwctl)
+ return NULL;
+
+ cdev_init(&fwctl->cdev, &fwctl_fops);
+ /*
+ * The driver module is protected by fwctl_register/unregister(),
+ * unregister won't complete until we are done with the driver's module.
+ */
+ fwctl->cdev.owner = THIS_MODULE;
+
+ if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev))
+ return NULL;
+
+ fwctl->ops = ops;
+ return_ptr(fwctl);
+}
+EXPORT_SYMBOL_NS_GPL(_fwctl_alloc_device, "FWCTL");
+
+/**
+ * fwctl_register - Register a new device to the subsystem
+ * @fwctl: Previously allocated fwctl_device
+ *
+ * On return the device is visible through sysfs and /dev, driver ops may be
+ * called.
+ */
+int fwctl_register(struct fwctl_device *fwctl)
+{
+ return cdev_device_add(&fwctl->cdev, &fwctl->dev);
+}
+EXPORT_SYMBOL_NS_GPL(fwctl_register, "FWCTL");
+
+/**
+ * fwctl_unregister - Unregister a device from the subsystem
+ * @fwctl: Previously allocated and registered fwctl_device
+ *
+ * Undoes fwctl_register(). On return no driver ops will be called. The
+ * caller must still call fwctl_put() to free the fwctl.
+ *
+ * Unregister will return even if userspace still has file descriptors open.
+ * This will call ops->close_uctx() on any open FDs and after return no driver
+ * op will be called. The FDs remain open but all fops will return -ENODEV.
+ *
+ * The design of fwctl allows this sort of disassociation of the driver from the
+ * subsystem primarily by keeping memory allocations owned by the core subsytem.
+ * The fwctl_device and fwctl_uctx can both be freed without requiring a driver
+ * callback. This allows the module to remain unlocked while FDs are open.
+ */
+void fwctl_unregister(struct fwctl_device *fwctl)
+{
+ struct fwctl_uctx *uctx;
+
+ cdev_device_del(&fwctl->cdev, &fwctl->dev);
+
+ /* Disable and free the driver's resources for any still open FDs. */
+ guard(rwsem_write)(&fwctl->registration_lock);
+ guard(mutex)(&fwctl->uctx_list_lock);
+ while ((uctx = list_first_entry_or_null(&fwctl->uctx_list,
+ struct fwctl_uctx,
+ uctx_list_entry)))
+ fwctl_destroy_uctx(uctx);
+
+ /*
+ * The driver module may unload after this returns, the op pointer will
+ * not be valid.
+ */
+ fwctl->ops = NULL;
+}
+EXPORT_SYMBOL_NS_GPL(fwctl_unregister, "FWCTL");
+
+static int __init fwctl_init(void)
+{
+ int ret;
+
+ ret = alloc_chrdev_region(&fwctl_dev, 0, FWCTL_MAX_DEVICES, "fwctl");
+ if (ret)
+ return ret;
+
+ ret = class_register(&fwctl_class);
+ if (ret)
+ goto err_chrdev;
+ return 0;
+
+err_chrdev:
+ unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
+ return ret;
+}
+
+static void __exit fwctl_exit(void)
+{
+ class_unregister(&fwctl_class);
+ unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
+}
+
+module_init(fwctl_init);
+module_exit(fwctl_exit);
+MODULE_DESCRIPTION("fwctl device firmware access framework");
+MODULE_LICENSE("GPL");
diff --git a/drivers/fwctl/mlx5/Makefile b/drivers/fwctl/mlx5/Makefile
new file mode 100644
index 000000000000..139a23e3c7c5
--- /dev/null
+++ b/drivers/fwctl/mlx5/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL_MLX5) += mlx5_fwctl.o
+
+mlx5_fwctl-y += main.o
diff --git a/drivers/fwctl/mlx5/main.c b/drivers/fwctl/mlx5/main.c
new file mode 100644
index 000000000000..f93aa0cecdb9
--- /dev/null
+++ b/drivers/fwctl/mlx5/main.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/fwctl.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/driver.h>
+#include <uapi/fwctl/mlx5.h>
+
+#define mlx5ctl_err(mcdev, format, ...) \
+ dev_err(&mcdev->fwctl.dev, format, ##__VA_ARGS__)
+
+#define mlx5ctl_dbg(mcdev, format, ...) \
+ dev_dbg(&mcdev->fwctl.dev, "PID %u: " format, current->pid, \
+ ##__VA_ARGS__)
+
+struct mlx5ctl_uctx {
+ struct fwctl_uctx uctx;
+ u32 uctx_caps;
+ u32 uctx_uid;
+};
+
+struct mlx5ctl_dev {
+ struct fwctl_device fwctl;
+ struct mlx5_core_dev *mdev;
+};
+DEFINE_FREE(mlx5ctl, struct mlx5ctl_dev *, if (_T) fwctl_put(&_T->fwctl));
+
+struct mlx5_ifc_mbox_in_hdr_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_out_hdr_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+enum {
+ MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES = 0x4,
+};
+
+enum {
+ MLX5_CMD_OP_QUERY_DRIVER_VERSION = 0x10c,
+ MLX5_CMD_OP_QUERY_OTHER_HCA_CAP = 0x10e,
+ MLX5_CMD_OP_QUERY_RDB = 0x512,
+ MLX5_CMD_OP_QUERY_PSV = 0x602,
+ MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716,
+ MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER = 0x722,
+ MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT = 0x728,
+ MLX5_CMD_OP_QUERY_BURST_SIZE = 0x813,
+ MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS = 0x819,
+ MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS = 0x820,
+ MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS = 0x821,
+ MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS = 0x911,
+ MLX5_CMD_OP_QUERY_AFU = 0x971,
+ MLX5_CMD_OP_QUERY_CAPI_PEC = 0x981,
+ MLX5_CMD_OP_QUERY_UCTX = 0xa05,
+ MLX5_CMD_OP_QUERY_UMEM = 0xa09,
+ MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE = 0xb02,
+ MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO = 0xb03,
+ MLX5_CMD_OP_QUERY_REGEXP_PARAMS = 0xb05,
+ MLX5_CMD_OP_QUERY_REGEXP_REGISTER = 0xb07,
+ MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY = 0xb08,
+ MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS = 0xb0a,
+ MLX5_CMD_OP_ACCESS_REGISTER_USER = 0xb0c,
+ MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING = 0xb0f,
+ MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO = 0xb13,
+ MLX5_CMD_OP_QUERY_CRYPTO_STATE = 0xb14,
+ MLX5_CMD_OP_QUERY_VUID = 0xb22,
+ MLX5_CMD_OP_QUERY_DPA_PARTITION = 0xb28,
+ MLX5_CMD_OP_QUERY_DPA_PARTITIONS = 0xb2a,
+ MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT = 0xb2e,
+ MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO = 0xb2f,
+ MLX5_CMD_OP_QUERY_RSV_RESOURCES = 0x8000,
+ MLX5_CMD_OP_QUERY_MTT = 0x8001,
+ MLX5_CMD_OP_QUERY_SCHED_QUEUE = 0x8006,
+};
+
+static int mlx5ctl_alloc_uid(struct mlx5ctl_dev *mcdev, u32 cap)
+{
+ u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
+ void *uctx;
+ int ret;
+ u16 uid;
+
+ uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+
+ mlx5ctl_dbg(mcdev, "%s: caps 0x%x\n", __func__, cap);
+ MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
+ MLX5_SET(uctx, uctx, cap, cap);
+
+ ret = mlx5_cmd_exec(mcdev->mdev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ uid = MLX5_GET(create_uctx_out, out, uid);
+ mlx5ctl_dbg(mcdev, "allocated uid %u with caps 0x%x\n", uid, cap);
+ return uid;
+}
+
+static void mlx5ctl_release_uid(struct mlx5ctl_dev *mcdev, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
+ struct mlx5_core_dev *mdev = mcdev->mdev;
+ int ret;
+
+ MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
+ MLX5_SET(destroy_uctx_in, in, uid, uid);
+
+ ret = mlx5_cmd_exec_in(mdev, destroy_uctx, in);
+ mlx5ctl_dbg(mcdev, "released uid %u %pe\n", uid, ERR_PTR(ret));
+}
+
+static int mlx5ctl_open_uctx(struct fwctl_uctx *uctx)
+{
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+ struct mlx5ctl_dev *mcdev =
+ container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+ int uid;
+
+ /*
+ * New FW supports the TOOLS_RESOURCES uid security label
+ * which allows commands to manipulate the global device state.
+ * Otherwise only basic existing RDMA devx privilege are allowed.
+ */
+ if (MLX5_CAP_GEN(mcdev->mdev, uctx_cap) &
+ MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES)
+ mfd->uctx_caps |= MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES;
+
+ uid = mlx5ctl_alloc_uid(mcdev, mfd->uctx_caps);
+ if (uid < 0)
+ return uid;
+
+ mfd->uctx_uid = uid;
+ return 0;
+}
+
+static void mlx5ctl_close_uctx(struct fwctl_uctx *uctx)
+{
+ struct mlx5ctl_dev *mcdev =
+ container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+
+ mlx5ctl_release_uid(mcdev, mfd->uctx_uid);
+}
+
+static void *mlx5ctl_info(struct fwctl_uctx *uctx, size_t *length)
+{
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+ struct fwctl_info_mlx5 *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->uid = mfd->uctx_uid;
+ info->uctx_caps = mfd->uctx_caps;
+ *length = sizeof(*info);
+ return info;
+}
+
+static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope)
+{
+ u16 opcode = MLX5_GET(mbox_in_hdr, in, opcode);
+ u16 op_mod = MLX5_GET(mbox_in_hdr, in, op_mod);
+
+ /*
+ * Currently the driver can't keep track of commands that allocate
+ * objects in the FW, these commands are safe from a security
+ * perspective but nothing will free the memory when the FD is closed.
+ * For now permit only query commands and set commands that don't alter
+ * objects. Also the caps for the scope have not been defined yet,
+ * filter commands manually for now.
+ */
+ switch (opcode) {
+ case MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_OTHER_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OPCODE_QUERY_VUID:
+ /*
+ * FW limits SET_HCA_CAP on the tools UID to only the other function
+ * mode which is used for function pre-configuration
+ */
+ case MLX5_CMD_OP_SET_HCA_CAP:
+ return true; /* scope >= FWCTL_RPC_CONFIGURATION; */
+
+ case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+ case MLX5_CMD_OP_FPGA_QUERY_QP:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_QUERY_AFU:
+ case MLX5_CMD_OP_QUERY_BURST_SIZE:
+ case MLX5_CMD_OP_QUERY_CAPI_PEC:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_QUERY_CRYPTO_STATE:
+ case MLX5_CMD_OP_QUERY_DC_CNAK_TRACE:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS:
+ case MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS:
+ case MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS:
+ case MLX5_CMD_OP_QUERY_DPA_PARTITION:
+ case MLX5_CMD_OP_QUERY_DPA_PARTITIONS:
+ case MLX5_CMD_OP_QUERY_DRIVER_VERSION:
+ case MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO:
+ case MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO:
+ case MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING:
+ case MLX5_CMD_OP_QUERY_EQ:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_QUERY_MAD_DEMUX:
+ case MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_QUERY_MTT:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER:
+ case MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE:
+ case MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT:
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_PAGES:
+ case MLX5_CMD_OP_QUERY_PSV:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+ case MLX5_CMD_OP_QUERY_RDB:
+ case MLX5_CMD_OP_QUERY_REGEXP_PARAMS:
+ case MLX5_CMD_OP_QUERY_REGEXP_REGISTER:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_QUERY_RQT:
+ case MLX5_CMD_OP_QUERY_RSV_RESOURCES:
+ case MLX5_CMD_OP_QUERY_SCHED_QUEUE:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_SF_PARTITION:
+ case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_QUERY_UCTX:
+ case MLX5_CMD_OP_QUERY_UMEM:
+ case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
+ case MLX5_CMD_OP_QUERY_VHCA_STATE:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_WOL_ROL:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS:
+ return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
+
+ case MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS:
+ return scope >= FWCTL_RPC_DEBUG_WRITE;
+
+ case MLX5_CMD_OP_ACCESS_REG:
+ case MLX5_CMD_OP_ACCESS_REGISTER_USER:
+ if (op_mod == 0) /* write */
+ return true; /* scope >= FWCTL_RPC_CONFIGURATION; */
+ return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
+ default:
+ return false;
+ }
+}
+
+static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *rpc_in, size_t in_len, size_t *out_len)
+{
+ struct mlx5ctl_dev *mcdev =
+ container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+ struct mlx5ctl_uctx *mfd =
+ container_of(uctx, struct mlx5ctl_uctx, uctx);
+ void *rpc_out;
+ int ret;
+
+ if (in_len < MLX5_ST_SZ_BYTES(mbox_in_hdr) ||
+ *out_len < MLX5_ST_SZ_BYTES(mbox_out_hdr))
+ return ERR_PTR(-EMSGSIZE);
+
+ mlx5ctl_dbg(mcdev, "[UID %d] cmdif: opcode 0x%x inlen %zu outlen %zu\n",
+ mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
+ in_len, *out_len);
+
+ if (!mlx5ctl_validate_rpc(rpc_in, scope))
+ return ERR_PTR(-EBADMSG);
+
+ /*
+ * mlx5_cmd_do() copies the input message to its own buffer before
+ * executing it, so we can reuse the allocation for the output.
+ */
+ if (*out_len <= in_len) {
+ rpc_out = rpc_in;
+ } else {
+ rpc_out = kvzalloc(*out_len, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Enforce the user context for the command */
+ MLX5_SET(mbox_in_hdr, rpc_in, uid, mfd->uctx_uid);
+ ret = mlx5_cmd_do(mcdev->mdev, rpc_in, in_len, rpc_out, *out_len);
+
+ mlx5ctl_dbg(mcdev,
+ "[UID %d] cmdif: opcode 0x%x status 0x%x retval %pe\n",
+ mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
+ MLX5_GET(mbox_out_hdr, rpc_out, status), ERR_PTR(ret));
+
+ /*
+ * -EREMOTEIO means execution succeeded and the out is valid,
+ * but an error code was returned inside out. Everything else
+ * means the RPC did not make it to the device.
+ */
+ if (ret && ret != -EREMOTEIO) {
+ if (rpc_out != rpc_in)
+ kfree(rpc_out);
+ return ERR_PTR(ret);
+ }
+ return rpc_out;
+}
+
+static const struct fwctl_ops mlx5ctl_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_MLX5,
+ .uctx_size = sizeof(struct mlx5ctl_uctx),
+ .open_uctx = mlx5ctl_open_uctx,
+ .close_uctx = mlx5ctl_close_uctx,
+ .info = mlx5ctl_info,
+ .fw_rpc = mlx5ctl_fw_rpc,
+};
+
+static int mlx5ctl_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+
+{
+ struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = madev->mdev;
+ struct mlx5ctl_dev *mcdev __free(mlx5ctl) = fwctl_alloc_device(
+ &mdev->pdev->dev, &mlx5ctl_ops, struct mlx5ctl_dev, fwctl);
+ int ret;
+
+ if (!mcdev)
+ return -ENOMEM;
+
+ mcdev->mdev = mdev;
+
+ ret = fwctl_register(&mcdev->fwctl);
+ if (ret)
+ return ret;
+ auxiliary_set_drvdata(adev, no_free_ptr(mcdev));
+ return 0;
+}
+
+static void mlx5ctl_remove(struct auxiliary_device *adev)
+{
+ struct mlx5ctl_dev *mcdev = auxiliary_get_drvdata(adev);
+
+ fwctl_unregister(&mcdev->fwctl);
+ fwctl_put(&mcdev->fwctl);
+}
+
+static const struct auxiliary_device_id mlx5ctl_id_table[] = {
+ {.name = MLX5_ADEV_NAME ".fwctl",},
+ {}
+};
+MODULE_DEVICE_TABLE(auxiliary, mlx5ctl_id_table);
+
+static struct auxiliary_driver mlx5ctl_driver = {
+ .name = "mlx5_fwctl",
+ .probe = mlx5ctl_probe,
+ .remove = mlx5ctl_remove,
+ .id_table = mlx5ctl_id_table,
+};
+
+module_auxiliary_driver(mlx5ctl_driver);
+
+MODULE_IMPORT_NS("FWCTL");
+MODULE_DESCRIPTION("mlx5 ConnectX fwctl driver");
+MODULE_AUTHOR("Saeed Mahameed <saeedm@nvidia.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/fwctl/pds/Makefile b/drivers/fwctl/pds/Makefile
new file mode 100644
index 000000000000..cc2317c07be1
--- /dev/null
+++ b/drivers/fwctl/pds/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL_PDS) += pds_fwctl.o
+
+pds_fwctl-y += main.o
diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c
new file mode 100644
index 000000000000..284c4165fdd4
--- /dev/null
+++ b/drivers/fwctl/pds/main.c
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) Advanced Micro Devices, Inc */
+
+#include <linux/module.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/bitfield.h>
+
+#include <uapi/fwctl/fwctl.h>
+#include <uapi/fwctl/pds.h>
+#include <linux/fwctl.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+struct pdsfc_uctx {
+ struct fwctl_uctx uctx;
+ u32 uctx_caps;
+};
+
+struct pdsfc_rpc_endpoint_info {
+ u32 endpoint;
+ dma_addr_t operations_pa;
+ struct pds_fwctl_query_data *operations;
+ struct mutex lock; /* lock for endpoint info management */
+};
+
+struct pdsfc_dev {
+ struct fwctl_device fwctl;
+ struct pds_auxiliary_dev *padev;
+ u32 caps;
+ struct pds_fwctl_ident ident;
+ dma_addr_t endpoints_pa;
+ struct pds_fwctl_query_data *endpoints;
+ struct pdsfc_rpc_endpoint_info *endpoint_info;
+};
+
+static int pdsfc_open_uctx(struct fwctl_uctx *uctx)
+{
+ struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
+ struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx);
+
+ pdsfc_uctx->uctx_caps = pdsfc->caps;
+
+ return 0;
+}
+
+static void pdsfc_close_uctx(struct fwctl_uctx *uctx)
+{
+}
+
+static void *pdsfc_info(struct fwctl_uctx *uctx, size_t *length)
+{
+ struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx);
+ struct fwctl_info_pds *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->uctx_caps = pdsfc_uctx->uctx_caps;
+
+ return info;
+}
+
+static int pdsfc_identify(struct pdsfc_dev *pdsfc)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ union pds_core_adminq_comp comp = {0};
+ union pds_core_adminq_cmd cmd;
+ struct pds_fwctl_ident *ident;
+ dma_addr_t ident_pa;
+ int err;
+
+ ident = dma_alloc_coherent(dev->parent, sizeof(*ident), &ident_pa, GFP_KERNEL);
+ if (!ident) {
+ dev_err(dev, "Failed to map ident buffer\n");
+ return -ENOMEM;
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_ident = {
+ .opcode = PDS_FWCTL_CMD_IDENT,
+ .version = 0,
+ .len = cpu_to_le32(sizeof(*ident)),
+ .ident_pa = cpu_to_le64(ident_pa),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err)
+ dev_err(dev, "Failed to send adminq cmd opcode: %u err: %d\n",
+ cmd.fwctl_ident.opcode, err);
+ else
+ pdsfc->ident = *ident;
+
+ dma_free_coherent(dev->parent, sizeof(*ident), ident, ident_pa);
+
+ return err;
+}
+
+static void pdsfc_free_endpoints(struct pdsfc_dev *pdsfc)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ int i;
+
+ if (!pdsfc->endpoints)
+ return;
+
+ for (i = 0; pdsfc->endpoint_info && i < pdsfc->endpoints->num_entries; i++)
+ mutex_destroy(&pdsfc->endpoint_info[i].lock);
+ vfree(pdsfc->endpoint_info);
+ pdsfc->endpoint_info = NULL;
+ dma_free_coherent(dev->parent, PAGE_SIZE,
+ pdsfc->endpoints, pdsfc->endpoints_pa);
+ pdsfc->endpoints = NULL;
+ pdsfc->endpoints_pa = DMA_MAPPING_ERROR;
+}
+
+static void pdsfc_free_operations(struct pdsfc_dev *pdsfc)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ u32 num_endpoints;
+ int i;
+
+ num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
+ for (i = 0; i < num_endpoints; i++) {
+ struct pdsfc_rpc_endpoint_info *ei = &pdsfc->endpoint_info[i];
+
+ if (ei->operations) {
+ dma_free_coherent(dev->parent, PAGE_SIZE,
+ ei->operations, ei->operations_pa);
+ ei->operations = NULL;
+ ei->operations_pa = DMA_MAPPING_ERROR;
+ }
+ }
+}
+
+static struct pds_fwctl_query_data *pdsfc_get_endpoints(struct pdsfc_dev *pdsfc,
+ dma_addr_t *pa)
+{
+ struct device *dev = &pdsfc->fwctl.dev;
+ union pds_core_adminq_comp comp = {0};
+ struct pds_fwctl_query_data *data;
+ union pds_core_adminq_cmd cmd;
+ dma_addr_t data_pa;
+ int err;
+
+ data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
+ if (!data) {
+ dev_err(dev, "Failed to map endpoint list\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_query = {
+ .opcode = PDS_FWCTL_CMD_QUERY,
+ .entity = PDS_FWCTL_RPC_ROOT,
+ .version = 0,
+ .query_data_buf_len = cpu_to_le32(PAGE_SIZE),
+ .query_data_buf_pa = cpu_to_le64(data_pa),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err) {
+ dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
+ cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
+ dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
+ return ERR_PTR(err);
+ }
+
+ *pa = data_pa;
+
+ return data;
+}
+
+static int pdsfc_init_endpoints(struct pdsfc_dev *pdsfc)
+{
+ struct pds_fwctl_query_data_endpoint *ep_entry;
+ u32 num_endpoints;
+ int i;
+
+ pdsfc->endpoints = pdsfc_get_endpoints(pdsfc, &pdsfc->endpoints_pa);
+ if (IS_ERR(pdsfc->endpoints))
+ return PTR_ERR(pdsfc->endpoints);
+
+ num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
+ pdsfc->endpoint_info = vcalloc(num_endpoints,
+ sizeof(*pdsfc->endpoint_info));
+ if (!pdsfc->endpoint_info) {
+ pdsfc_free_endpoints(pdsfc);
+ return -ENOMEM;
+ }
+
+ ep_entry = (struct pds_fwctl_query_data_endpoint *)pdsfc->endpoints->entries;
+ for (i = 0; i < num_endpoints; i++) {
+ mutex_init(&pdsfc->endpoint_info[i].lock);
+ pdsfc->endpoint_info[i].endpoint = ep_entry[i].id;
+ }
+
+ return 0;
+}
+
+static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc,
+ dma_addr_t *pa, u32 ep)
+{
+ struct pds_fwctl_query_data_operation *entries;
+ struct device *dev = &pdsfc->fwctl.dev;
+ union pds_core_adminq_comp comp = {0};
+ struct pds_fwctl_query_data *data;
+ union pds_core_adminq_cmd cmd;
+ dma_addr_t data_pa;
+ int err;
+ int i;
+
+ /* Query the operations list for the given endpoint */
+ data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
+ if (!data) {
+ dev_err(dev, "Failed to map operations list\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_query = {
+ .opcode = PDS_FWCTL_CMD_QUERY,
+ .entity = PDS_FWCTL_RPC_ENDPOINT,
+ .version = 0,
+ .query_data_buf_len = cpu_to_le32(PAGE_SIZE),
+ .query_data_buf_pa = cpu_to_le64(data_pa),
+ .ep = cpu_to_le32(ep),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err) {
+ dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
+ cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
+ dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
+ return ERR_PTR(err);
+ }
+
+ *pa = data_pa;
+
+ entries = (struct pds_fwctl_query_data_operation *)data->entries;
+ dev_dbg(dev, "num_entries %d\n", data->num_entries);
+ for (i = 0; i < data->num_entries; i++) {
+
+ /* Translate FW command attribute to fwctl scope */
+ switch (entries[i].scope) {
+ case PDSFC_FW_CMD_ATTR_READ:
+ case PDSFC_FW_CMD_ATTR_WRITE:
+ case PDSFC_FW_CMD_ATTR_SYNC:
+ entries[i].scope = FWCTL_RPC_CONFIGURATION;
+ break;
+ case PDSFC_FW_CMD_ATTR_DEBUG_READ:
+ entries[i].scope = FWCTL_RPC_DEBUG_READ_ONLY;
+ break;
+ case PDSFC_FW_CMD_ATTR_DEBUG_WRITE:
+ entries[i].scope = FWCTL_RPC_DEBUG_WRITE;
+ break;
+ default:
+ entries[i].scope = FWCTL_RPC_DEBUG_WRITE_FULL;
+ break;
+ }
+ dev_dbg(dev, "endpoint %d operation: id %x scope %d\n",
+ ep, entries[i].id, entries[i].scope);
+ }
+
+ return data;
+}
+
+static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc,
+ struct fwctl_rpc_pds *rpc,
+ enum fwctl_rpc_scope scope)
+{
+ struct pds_fwctl_query_data_operation *op_entry;
+ struct pdsfc_rpc_endpoint_info *ep_info = NULL;
+ struct device *dev = &pdsfc->fwctl.dev;
+ int i;
+
+ /* validate rpc in_len & out_len based
+ * on ident.max_req_sz & max_resp_sz
+ */
+ if (rpc->in.len > pdsfc->ident.max_req_sz) {
+ dev_dbg(dev, "Invalid request size %u, max %u\n",
+ rpc->in.len, pdsfc->ident.max_req_sz);
+ return -EINVAL;
+ }
+
+ if (rpc->out.len > pdsfc->ident.max_resp_sz) {
+ dev_dbg(dev, "Invalid response size %u, max %u\n",
+ rpc->out.len, pdsfc->ident.max_resp_sz);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < pdsfc->endpoints->num_entries; i++) {
+ if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) {
+ ep_info = &pdsfc->endpoint_info[i];
+ break;
+ }
+ }
+ if (!ep_info) {
+ dev_dbg(dev, "Invalid endpoint %d\n", rpc->in.ep);
+ return -EINVAL;
+ }
+
+ /* query and cache this endpoint's operations */
+ mutex_lock(&ep_info->lock);
+ if (!ep_info->operations) {
+ struct pds_fwctl_query_data *operations;
+
+ operations = pdsfc_get_operations(pdsfc,
+ &ep_info->operations_pa,
+ rpc->in.ep);
+ if (IS_ERR(operations)) {
+ mutex_unlock(&ep_info->lock);
+ return -ENOMEM;
+ }
+ ep_info->operations = operations;
+ }
+ mutex_unlock(&ep_info->lock);
+
+ /* reject unsupported and/or out of scope commands */
+ op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
+ for (i = 0; i < ep_info->operations->num_entries; i++) {
+ if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
+ if (scope < op_entry[i].scope)
+ return -EPERM;
+ return 0;
+ }
+ }
+
+ dev_dbg(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);
+
+ return -EINVAL;
+}
+
+static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *in, size_t in_len, size_t *out_len)
+{
+ struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
+ struct device *dev = &uctx->fwctl->dev;
+ union pds_core_adminq_comp comp = {0};
+ dma_addr_t out_payload_dma_addr = 0;
+ dma_addr_t in_payload_dma_addr = 0;
+ struct fwctl_rpc_pds *rpc = in;
+ union pds_core_adminq_cmd cmd;
+ void *out_payload = NULL;
+ void *in_payload = NULL;
+ void *out = NULL;
+ int err;
+
+ err = pdsfc_validate_rpc(pdsfc, rpc, scope);
+ if (err)
+ return ERR_PTR(err);
+
+ if (rpc->in.len > 0) {
+ in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
+ if (!in_payload) {
+ dev_err(dev, "Failed to allocate in_payload\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
+ rpc->in.len)) {
+ dev_dbg(dev, "Failed to copy in_payload from user\n");
+ err = -EFAULT;
+ goto err_in_payload;
+ }
+
+ in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
+ rpc->in.len, DMA_TO_DEVICE);
+ err = dma_mapping_error(dev->parent, in_payload_dma_addr);
+ if (err) {
+ dev_dbg(dev, "Failed to map in_payload\n");
+ goto err_in_payload;
+ }
+ }
+
+ if (rpc->out.len > 0) {
+ out_payload = kzalloc(rpc->out.len, GFP_KERNEL);
+ if (!out_payload) {
+ dev_dbg(dev, "Failed to allocate out_payload\n");
+ err = -ENOMEM;
+ goto err_out_payload;
+ }
+
+ out_payload_dma_addr = dma_map_single(dev->parent, out_payload,
+ rpc->out.len, DMA_FROM_DEVICE);
+ err = dma_mapping_error(dev->parent, out_payload_dma_addr);
+ if (err) {
+ dev_dbg(dev, "Failed to map out_payload\n");
+ goto err_out_payload;
+ }
+ }
+
+ cmd = (union pds_core_adminq_cmd) {
+ .fwctl_rpc = {
+ .opcode = PDS_FWCTL_CMD_RPC,
+ .flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP,
+ .ep = cpu_to_le32(rpc->in.ep),
+ .op = cpu_to_le32(rpc->in.op),
+ .req_pa = cpu_to_le64(in_payload_dma_addr),
+ .req_sz = cpu_to_le32(rpc->in.len),
+ .resp_pa = cpu_to_le64(out_payload_dma_addr),
+ .resp_sz = cpu_to_le32(rpc->out.len),
+ }
+ };
+
+ err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+ if (err) {
+ dev_dbg(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
+ __func__, rpc->in.ep, rpc->in.op,
+ cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
+ cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
+ err);
+ goto done;
+ }
+
+ dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
+
+ if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
+ dev_dbg(dev, "Failed to copy out_payload to user\n");
+ out = ERR_PTR(-EFAULT);
+ goto done;
+ }
+
+ rpc->out.retval = le32_to_cpu(comp.fwctl_rpc.err);
+ *out_len = in_len;
+ out = in;
+
+done:
+ if (out_payload_dma_addr)
+ dma_unmap_single(dev->parent, out_payload_dma_addr,
+ rpc->out.len, DMA_FROM_DEVICE);
+err_out_payload:
+ kfree(out_payload);
+
+ if (in_payload_dma_addr)
+ dma_unmap_single(dev->parent, in_payload_dma_addr,
+ rpc->in.len, DMA_TO_DEVICE);
+err_in_payload:
+ kfree(in_payload);
+err_out:
+ if (err)
+ return ERR_PTR(err);
+
+ return out;
+}
+
+static const struct fwctl_ops pdsfc_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_PDS,
+ .uctx_size = sizeof(struct pdsfc_uctx),
+ .open_uctx = pdsfc_open_uctx,
+ .close_uctx = pdsfc_close_uctx,
+ .info = pdsfc_info,
+ .fw_rpc = pdsfc_fw_rpc,
+};
+
+static int pdsfc_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct pds_auxiliary_dev *padev =
+ container_of(adev, struct pds_auxiliary_dev, aux_dev);
+ struct device *dev = &adev->dev;
+ struct pdsfc_dev *pdsfc;
+ int err;
+
+ pdsfc = fwctl_alloc_device(&padev->vf_pdev->dev, &pdsfc_ops,
+ struct pdsfc_dev, fwctl);
+ if (!pdsfc)
+ return dev_err_probe(dev, -ENOMEM, "Failed to allocate fwctl device struct\n");
+ pdsfc->padev = padev;
+
+ err = pdsfc_identify(pdsfc);
+ if (err) {
+ fwctl_put(&pdsfc->fwctl);
+ return dev_err_probe(dev, err, "Failed to identify device\n");
+ }
+
+ err = pdsfc_init_endpoints(pdsfc);
+ if (err) {
+ fwctl_put(&pdsfc->fwctl);
+ return dev_err_probe(dev, err, "Failed to init endpoints\n");
+ }
+
+ pdsfc->caps = PDS_FWCTL_QUERY_CAP | PDS_FWCTL_SEND_CAP;
+
+ err = fwctl_register(&pdsfc->fwctl);
+ if (err) {
+ pdsfc_free_endpoints(pdsfc);
+ fwctl_put(&pdsfc->fwctl);
+ return dev_err_probe(dev, err, "Failed to register device\n");
+ }
+
+ auxiliary_set_drvdata(adev, pdsfc);
+
+ return 0;
+}
+
+static void pdsfc_remove(struct auxiliary_device *adev)
+{
+ struct pdsfc_dev *pdsfc = auxiliary_get_drvdata(adev);
+
+ fwctl_unregister(&pdsfc->fwctl);
+ pdsfc_free_operations(pdsfc);
+ pdsfc_free_endpoints(pdsfc);
+
+ fwctl_put(&pdsfc->fwctl);
+}
+
+static const struct auxiliary_device_id pdsfc_id_table[] = {
+ {.name = PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_FWCTL_STR },
+ {}
+};
+MODULE_DEVICE_TABLE(auxiliary, pdsfc_id_table);
+
+static struct auxiliary_driver pdsfc_driver = {
+ .name = "pds_fwctl",
+ .probe = pdsfc_probe,
+ .remove = pdsfc_remove,
+ .id_table = pdsfc_id_table,
+};
+
+module_auxiliary_driver(pdsfc_driver);
+
+MODULE_IMPORT_NS("FWCTL");
+MODULE_DESCRIPTION("pds fwctl driver");
+MODULE_AUTHOR("Shannon Nelson <shannon.nelson@amd.com>");
+MODULE_AUTHOR("Brett Creeley <brett.creeley@amd.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c
index 2babea110991..eeb72b1809ea 100644
--- a/drivers/net/ethernet/amd/pds_core/auxbus.c
+++ b/drivers/net/ethernet/amd/pds_core/auxbus.c
@@ -175,34 +175,32 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf,
return padev;
}
-int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf)
+void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf,
+ struct pds_auxiliary_dev **pd_ptr)
{
struct pds_auxiliary_dev *padev;
- int err = 0;
- if (!cf)
- return -ENODEV;
+ if (!*pd_ptr)
+ return;
mutex_lock(&pf->config_lock);
- padev = pf->vfs[cf->vf_id].padev;
- if (padev) {
- pds_client_unregister(pf, padev->client_id);
- auxiliary_device_delete(&padev->aux_dev);
- auxiliary_device_uninit(&padev->aux_dev);
- padev->client_id = 0;
- }
- pf->vfs[cf->vf_id].padev = NULL;
+ padev = *pd_ptr;
+ pds_client_unregister(pf, padev->client_id);
+ auxiliary_device_delete(&padev->aux_dev);
+ auxiliary_device_uninit(&padev->aux_dev);
+ padev->client_id = 0;
+ *pd_ptr = NULL;
mutex_unlock(&pf->config_lock);
- return err;
}
-int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
+int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf,
+ enum pds_core_vif_types vt,
+ struct pds_auxiliary_dev **pd_ptr)
{
struct pds_auxiliary_dev *padev;
char devname[PDS_DEVNAME_LEN];
- enum pds_core_vif_types vt;
unsigned long mask;
u16 vt_support;
int client_id;
@@ -211,6 +209,9 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
if (!cf)
return -ENODEV;
+ if (vt >= PDS_DEV_TYPE_MAX)
+ return -EINVAL;
+
mutex_lock(&pf->config_lock);
mask = BIT_ULL(PDSC_S_FW_DEAD) |
@@ -222,17 +223,10 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
goto out_unlock;
}
- /* We only support vDPA so far, so it is the only one to
- * be verified that it is available in the Core device and
- * enabled in the devlink param. In the future this might
- * become a loop for several VIF types.
- */
-
/* Verify that the type is supported and enabled. It is not
- * an error if there is no auxbus device support for this
- * VF, it just means something else needs to happen with it.
+ * an error if the firmware doesn't support the feature, the
+ * driver just won't set up an auxiliary_device for it.
*/
- vt = PDS_DEV_TYPE_VDPA;
vt_support = !!le16_to_cpu(pf->dev_ident.vif_types[vt]);
if (!(vt_support &&
pf->viftype_status[vt].supported &&
@@ -258,7 +252,7 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
err = PTR_ERR(padev);
goto out_unlock;
}
- pf->vfs[cf->vf_id].padev = padev;
+ *pd_ptr = padev;
out_unlock:
mutex_unlock(&pf->config_lock);
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 536635e57727..1eb0d92786f7 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -402,6 +402,9 @@ err_out_uninit:
}
static struct pdsc_viftype pdsc_viftype_defaults[] = {
+ [PDS_DEV_TYPE_FWCTL] = { .name = PDS_DEV_TYPE_FWCTL_STR,
+ .vif_id = PDS_DEV_TYPE_FWCTL,
+ .dl_id = -1 },
[PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR,
.vif_id = PDS_DEV_TYPE_VDPA,
.dl_id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET },
@@ -428,6 +431,10 @@ static int pdsc_viftypes_init(struct pdsc *pdsc)
/* See what the Core device has for support */
vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]);
+
+ if (vt == PDS_DEV_TYPE_FWCTL)
+ pdsc->viftype_status[vt].enabled = true;
+
dev_dbg(pdsc->dev, "VIF %s is %ssupported\n",
pdsc->viftype_status[vt].name,
vt_support ? "" : "not ");
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 14522d6d5f86..0bf320c43083 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -156,6 +156,7 @@ struct pdsc {
struct dentry *dentry;
struct device *dev;
struct pdsc_dev_bar bars[PDS_CORE_BARS_MAX];
+ struct pds_auxiliary_dev *padev;
struct pdsc_vf *vfs;
int num_vfs;
int vf_id;
@@ -303,8 +304,11 @@ void pdsc_health_thread(struct work_struct *work);
int pdsc_register_notify(struct notifier_block *nb);
void pdsc_unregister_notify(struct notifier_block *nb);
void pdsc_notify(unsigned long event, void *data);
-int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf);
-int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf);
+int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf,
+ enum pds_core_vif_types vt,
+ struct pds_auxiliary_dev **pd_ptr);
+void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf,
+ struct pds_auxiliary_dev **pd_ptr);
void pdsc_process_adminq(struct pdsc_qcq *qcq);
void pdsc_work_thread(struct work_struct *work);
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index 44971e71991f..c5c787df61a4 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -56,8 +56,11 @@ int pdsc_dl_enable_set(struct devlink *dl, u32 id,
for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) {
struct pdsc *vf = pdsc->vfs[vf_id].vf;
- err = ctx->val.vbool ? pdsc_auxbus_dev_add(vf, pdsc) :
- pdsc_auxbus_dev_del(vf, pdsc);
+ if (ctx->val.vbool)
+ err = pdsc_auxbus_dev_add(vf, pdsc, vt_entry->vif_id,
+ &pdsc->vfs[vf_id].padev);
+ else
+ pdsc_auxbus_dev_del(vf, pdsc, &pdsc->vfs[vf_id].padev);
}
return err;
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 660268ff9562..4843f9249a31 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -190,7 +190,8 @@ static int pdsc_init_vf(struct pdsc *vf)
devl_unlock(dl);
pf->vfs[vf->vf_id].vf = vf;
- err = pdsc_auxbus_dev_add(vf, pf);
+ err = pdsc_auxbus_dev_add(vf, pf, PDS_DEV_TYPE_VDPA,
+ &pf->vfs[vf->vf_id].padev);
if (err) {
devl_lock(dl);
devl_unregister(dl);
@@ -264,6 +265,10 @@ static int pdsc_init_pf(struct pdsc *pdsc)
mutex_unlock(&pdsc->config_lock);
+ err = pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL, &pdsc->padev);
+ if (err)
+ goto err_out_stop;
+
dl = priv_to_devlink(pdsc);
devl_lock(dl);
err = devl_params_register(dl, pdsc_dl_params,
@@ -272,7 +277,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
devl_unlock(dl);
dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n",
ERR_PTR(err));
- goto err_out_stop;
+ goto err_out_del_dev;
}
hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
@@ -295,6 +300,8 @@ static int pdsc_init_pf(struct pdsc *pdsc)
err_out_unreg_params:
devlink_params_unregister(dl, pdsc_dl_params,
ARRAY_SIZE(pdsc_dl_params));
+err_out_del_dev:
+ pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
err_out_stop:
pdsc_stop(pdsc);
err_out_teardown:
@@ -417,7 +424,7 @@ static void pdsc_remove(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf)) {
- pdsc_auxbus_dev_del(pdsc, pf);
+ pdsc_auxbus_dev_del(pdsc, pf, &pf->vfs[pdsc->vf_id].padev);
pf->vfs[pdsc->vf_id].vf = NULL;
}
} else {
@@ -426,6 +433,7 @@ static void pdsc_remove(struct pci_dev *pdev)
* shut themselves down.
*/
pdsc_sriov_configure(pdev, 0);
+ pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
timer_shutdown_sync(&pdsc->wdtimer);
if (pdsc->wq)
@@ -482,7 +490,10 @@ static void pdsc_reset_prepare(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf))
- pdsc_auxbus_dev_del(pdsc, pf);
+ pdsc_auxbus_dev_del(pdsc, pf,
+ &pf->vfs[pdsc->vf_id].padev);
+ } else {
+ pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
}
pdsc_unmap_bars(pdsc);
@@ -527,7 +538,11 @@ static void pdsc_reset_done(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf))
- pdsc_auxbus_dev_add(pdsc, pf);
+ pdsc_auxbus_dev_add(pdsc, pf, PDS_DEV_TYPE_VDPA,
+ &pf->vfs[pdsc->vf_id].padev);
+ } else {
+ pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL,
+ &pdsc->padev);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 9a79674d27f1..891bbbbfbbf1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -228,8 +228,15 @@ enum {
MLX5_INTERFACE_PROTOCOL_VNET,
MLX5_INTERFACE_PROTOCOL_DPLL,
+ MLX5_INTERFACE_PROTOCOL_FWCTL,
};
+static bool is_fwctl_supported(struct mlx5_core_dev *dev)
+{
+ /* fwctl is most useful on PFs, prevent fwctl on SFs for now */
+ return MLX5_CAP_GEN(dev, uctx_cap) && !mlx5_core_is_sf(dev);
+}
+
static const struct mlx5_adev_device {
const char *suffix;
bool (*is_supported)(struct mlx5_core_dev *dev);
@@ -252,6 +259,8 @@ static const struct mlx5_adev_device {
.is_supported = &is_mp_supported },
[MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll",
.is_supported = &is_dpll_supported },
+ [MLX5_INTERFACE_PROTOCOL_FWCTL] = { .suffix = "fwctl",
+ .is_supported = &is_fwctl_supported },
};
int mlx5_adev_idx_alloc(void)
diff --git a/include/cxl/features.h b/include/cxl/features.h
new file mode 100644
index 000000000000..a3bb34694c06
--- /dev/null
+++ b/include/cxl/features.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024-2025 Intel Corporation. */
+#ifndef __CXL_FEATURES_H__
+#define __CXL_FEATURES_H__
+
+#include <linux/uuid.h>
+#include <linux/fwctl.h>
+#include <uapi/cxl/features.h>
+
+/* Feature UUIDs used by the kernel */
+#define CXL_FEAT_PATROL_SCRUB_UUID \
+ UUID_INIT(0x96dad7d6, 0xfde8, 0x482b, 0xa7, 0x33, 0x75, 0x77, 0x4e, \
+ 0x06, 0xdb, 0x8a)
+
+#define CXL_FEAT_ECS_UUID \
+ UUID_INIT(0xe5b13f22, 0x2328, 0x4a14, 0xb8, 0xba, 0xb9, 0x69, 0x1e, \
+ 0x89, 0x33, 0x86)
+
+#define CXL_FEAT_SPPR_UUID \
+ UUID_INIT(0x892ba475, 0xfad8, 0x474e, 0x9d, 0x3e, 0x69, 0x2c, 0x91, \
+ 0x75, 0x68, 0xbb)
+
+#define CXL_FEAT_HPPR_UUID \
+ UUID_INIT(0x80ea4521, 0x786f, 0x4127, 0xaf, 0xb1, 0xec, 0x74, 0x59, \
+ 0xfb, 0x0e, 0x24)
+
+#define CXL_FEAT_CACHELINE_SPARING_UUID \
+ UUID_INIT(0x96C33386, 0x91dd, 0x44c7, 0x9e, 0xcb, 0xfd, 0xaf, 0x65, \
+ 0x03, 0xba, 0xc4)
+
+#define CXL_FEAT_ROW_SPARING_UUID \
+ UUID_INIT(0x450ebf67, 0xb135, 0x4f97, 0xa4, 0x98, 0xc2, 0xd5, 0x7f, \
+ 0x27, 0x9b, 0xed)
+
+#define CXL_FEAT_BANK_SPARING_UUID \
+ UUID_INIT(0x78b79636, 0x90ac, 0x4b64, 0xa4, 0xef, 0xfa, 0xac, 0x5d, \
+ 0x18, 0xa8, 0x63)
+
+#define CXL_FEAT_RANK_SPARING_UUID \
+ UUID_INIT(0x34dbaff5, 0x0552, 0x4281, 0x8f, 0x76, 0xda, 0x0b, 0x5e, \
+ 0x7a, 0x76, 0xa7)
+
+/* Feature commands capability supported by a device */
+enum cxl_features_capability {
+ CXL_FEATURES_NONE = 0,
+ CXL_FEATURES_RO,
+ CXL_FEATURES_RW,
+};
+
+/**
+ * struct cxl_features_state - The Features state for the device
+ * @cxlds: Pointer to CXL device state
+ * @entries: CXl feature entry context
+ */
+struct cxl_features_state {
+ struct cxl_dev_state *cxlds;
+ struct cxl_feat_entries {
+ int num_features;
+ int num_user_features;
+ struct cxl_feat_entry ent[] __counted_by(num_features);
+ } *entries;
+};
+
+struct cxl_mailbox;
+struct cxl_memdev;
+#ifdef CONFIG_CXL_FEATURES
+inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds);
+int devm_cxl_setup_features(struct cxl_dev_state *cxlds);
+int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd);
+#else
+static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
+{
+ return NULL;
+}
+
+static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#endif
diff --git a/include/cxl/mailbox.h b/include/cxl/mailbox.h
index bacd111e75f1..c4e99e2e3a9d 100644
--- a/include/cxl/mailbox.h
+++ b/include/cxl/mailbox.h
@@ -3,24 +3,66 @@
#ifndef __CXL_MBOX_H__
#define __CXL_MBOX_H__
#include <linux/rcuwait.h>
+#include <cxl/features.h>
+#include <uapi/linux/cxl_mem.h>
-struct cxl_mbox_cmd;
+/**
+ * struct cxl_mbox_cmd - A command to be submitted to hardware.
+ * @opcode: (input) The command set and command submitted to hardware.
+ * @payload_in: (input) Pointer to the input payload.
+ * @payload_out: (output) Pointer to the output payload. Must be allocated by
+ * the caller.
+ * @size_in: (input) Number of bytes to load from @payload_in.
+ * @size_out: (input) Max number of bytes loaded into @payload_out.
+ * (output) Number of bytes generated by the device. For fixed size
+ * outputs commands this is always expected to be deterministic. For
+ * variable sized output commands, it tells the exact number of bytes
+ * written.
+ * @min_out: (input) internal command output payload size validation
+ * @poll_count: (input) Number of timeouts to attempt.
+ * @poll_interval_ms: (input) Time between mailbox background command polling
+ * interval timeouts.
+ * @return_code: (output) Error code returned from hardware.
+ *
+ * This is the primary mechanism used to send commands to the hardware.
+ * All the fields except @payload_* correspond exactly to the fields described in
+ * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
+ * @payload_out are written to, and read from the Command Payload Registers
+ * defined in CXL 2.0 8.2.8.4.8.
+ */
+struct cxl_mbox_cmd {
+ u16 opcode;
+ void *payload_in;
+ void *payload_out;
+ size_t size_in;
+ size_t size_out;
+ size_t min_out;
+ int poll_count;
+ int poll_interval_ms;
+ u16 return_code;
+};
/**
* struct cxl_mailbox - context for CXL mailbox operations
* @host: device that hosts the mailbox
+ * @enabled_cmds: mailbox commands that are enabled by the driver
+ * @exclusive_cmds: mailbox commands that are exclusive to the kernel
* @payload_size: Size of space for payload
* (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register)
* @mbox_mutex: mutex protects device mailbox and firmware
* @mbox_wait: rcuwait for mailbox
* @mbox_send: @dev specific transport for transmitting mailbox commands
+ * @feat_cap: Features capability
*/
struct cxl_mailbox {
struct device *host;
+ DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
+ DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
size_t payload_size;
struct mutex mbox_mutex; /* lock to protect mailbox context */
struct rcuwait mbox_wait;
int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd);
+ enum cxl_features_capability feat_cap;
};
int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host);
diff --git a/include/linux/fwctl.h b/include/linux/fwctl.h
new file mode 100644
index 000000000000..5d61fc8a6871
--- /dev/null
+++ b/include/linux/fwctl.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#ifndef __LINUX_FWCTL_H
+#define __LINUX_FWCTL_H
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/cleanup.h>
+#include <uapi/fwctl/fwctl.h>
+
+struct fwctl_device;
+struct fwctl_uctx;
+
+/**
+ * struct fwctl_ops - Driver provided operations
+ *
+ * fwctl_unregister() will wait until all excuting ops are completed before it
+ * returns. Drivers should be mindful to not let their ops run for too long as
+ * it will block device hot unplug and module unloading.
+ */
+struct fwctl_ops {
+ /**
+ * @device_type: The drivers assigned device_type number. This is uABI.
+ */
+ enum fwctl_device_type device_type;
+ /**
+ * @uctx_size: The size of the fwctl_uctx struct to allocate. The first
+ * bytes of this memory will be a fwctl_uctx. The driver can use the
+ * remaining bytes as its private memory.
+ */
+ size_t uctx_size;
+ /**
+ * @open_uctx: Called when a file descriptor is opened before the uctx
+ * is ever used.
+ */
+ int (*open_uctx)(struct fwctl_uctx *uctx);
+ /**
+ * @close_uctx: Called when the uctx is destroyed, usually when the FD
+ * is closed.
+ */
+ void (*close_uctx)(struct fwctl_uctx *uctx);
+ /**
+ * @info: Implement FWCTL_INFO. Return a kmalloc() memory that is copied
+ * to out_device_data. On input length indicates the size of the user
+ * buffer on output it indicates the size of the memory. The driver can
+ * ignore length on input, the core code will handle everything.
+ */
+ void *(*info)(struct fwctl_uctx *uctx, size_t *length);
+ /**
+ * @fw_rpc: Implement FWCTL_RPC. Deliver rpc_in/in_len to the FW and
+ * return the response and set out_len. rpc_in can be returned as the
+ * response pointer. Otherwise the returned pointer is freed with
+ * kvfree().
+ */
+ void *(*fw_rpc)(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *rpc_in, size_t in_len, size_t *out_len);
+};
+
+/**
+ * struct fwctl_device - Per-driver registration struct
+ * @dev: The sysfs (class/fwctl/fwctlXX) device
+ *
+ * Each driver instance will have one of these structs with the driver private
+ * data following immediately after. This struct is refcounted, it is freed by
+ * calling fwctl_put().
+ */
+struct fwctl_device {
+ struct device dev;
+ /* private: */
+ struct cdev cdev;
+
+ /* Protect uctx_list */
+ struct mutex uctx_list_lock;
+ struct list_head uctx_list;
+ /*
+ * Protect ops, held for write when ops becomes NULL during unregister,
+ * held for read whenever ops is loaded or an ops function is running.
+ */
+ struct rw_semaphore registration_lock;
+ const struct fwctl_ops *ops;
+};
+
+struct fwctl_device *_fwctl_alloc_device(struct device *parent,
+ const struct fwctl_ops *ops,
+ size_t size);
+/**
+ * fwctl_alloc_device - Allocate a fwctl
+ * @parent: Physical device that provides the FW interface
+ * @ops: Driver ops to register
+ * @drv_struct: 'struct driver_fwctl' that holds the struct fwctl_device
+ * @member: Name of the struct fwctl_device in @drv_struct
+ *
+ * This allocates and initializes the fwctl_device embedded in the drv_struct.
+ * Upon success the pointer must be freed via fwctl_put(). Returns a 'drv_struct
+ * \*' on success, NULL on error.
+ */
+#define fwctl_alloc_device(parent, ops, drv_struct, member) \
+ ({ \
+ static_assert(__same_type(struct fwctl_device, \
+ ((drv_struct *)NULL)->member)); \
+ static_assert(offsetof(drv_struct, member) == 0); \
+ (drv_struct *)_fwctl_alloc_device(parent, ops, \
+ sizeof(drv_struct)); \
+ })
+
+static inline struct fwctl_device *fwctl_get(struct fwctl_device *fwctl)
+{
+ get_device(&fwctl->dev);
+ return fwctl;
+}
+static inline void fwctl_put(struct fwctl_device *fwctl)
+{
+ put_device(&fwctl->dev);
+}
+DEFINE_FREE(fwctl, struct fwctl_device *, if (_T) fwctl_put(_T));
+
+int fwctl_register(struct fwctl_device *fwctl);
+void fwctl_unregister(struct fwctl_device *fwctl);
+
+/**
+ * struct fwctl_uctx - Per user FD context
+ * @fwctl: fwctl instance that owns the context
+ *
+ * Every FD opened by userspace will get a unique context allocation. Any driver
+ * private data will follow immediately after.
+ */
+struct fwctl_uctx {
+ struct fwctl_device *fwctl;
+ /* private: */
+ /* Head at fwctl_device::uctx_list */
+ struct list_head uctx_list_entry;
+};
+
+#endif
diff --git a/include/linux/panic.h b/include/linux/panic.h
index 54d90b6c5f47..2494d51707ef 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -74,7 +74,8 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout)
#define TAINT_AUX 16
#define TAINT_RANDSTRUCT 17
#define TAINT_TEST 18
-#define TAINT_FLAGS_COUNT 19
+#define TAINT_FWCTL 19
+#define TAINT_FLAGS_COUNT 20
#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1)
struct taint_flag {
diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index 4b4e9a98b37b..ddd111f04ca0 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -1179,6 +1179,274 @@ struct pds_lm_host_vf_status_cmd {
u8 status;
};
+enum pds_fwctl_cmd_opcode {
+ PDS_FWCTL_CMD_IDENT = 70,
+ PDS_FWCTL_CMD_RPC = 71,
+ PDS_FWCTL_CMD_QUERY = 72,
+};
+
+/**
+ * struct pds_fwctl_cmd - Firmware control command structure
+ * @opcode: Opcode
+ * @rsvd: Reserved
+ * @ep: Endpoint identifier
+ * @op: Operation identifier
+ */
+struct pds_fwctl_cmd {
+ u8 opcode;
+ u8 rsvd[3];
+ __le32 ep;
+ __le32 op;
+} __packed;
+
+/**
+ * struct pds_fwctl_comp - Firmware control completion structure
+ * @status: Status of the firmware control operation
+ * @rsvd: Reserved
+ * @comp_index: Completion index in little-endian format
+ * @rsvd2: Reserved
+ * @color: Color bit indicating the state of the completion
+ */
+struct pds_fwctl_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 rsvd2[11];
+ u8 color;
+} __packed;
+
+/**
+ * struct pds_fwctl_ident_cmd - Firmware control identification command structure
+ * @opcode: Operation code for the command
+ * @rsvd: Reserved
+ * @version: Interface version
+ * @rsvd2: Reserved
+ * @len: Length of the identification data
+ * @ident_pa: Physical address of the identification data
+ */
+struct pds_fwctl_ident_cmd {
+ u8 opcode;
+ u8 rsvd;
+ u8 version;
+ u8 rsvd2;
+ __le32 len;
+ __le64 ident_pa;
+} __packed;
+
+/* future feature bits here
+ * enum pds_fwctl_features {
+ * };
+ * (compilers don't like empty enums)
+ */
+
+/**
+ * struct pds_fwctl_ident - Firmware control identification structure
+ * @features: Supported features (enum pds_fwctl_features)
+ * @version: Interface version
+ * @rsvd: Reserved
+ * @max_req_sz: Maximum request size
+ * @max_resp_sz: Maximum response size
+ * @max_req_sg_elems: Maximum number of request SGs
+ * @max_resp_sg_elems: Maximum number of response SGs
+ */
+struct pds_fwctl_ident {
+ __le64 features;
+ u8 version;
+ u8 rsvd[3];
+ __le32 max_req_sz;
+ __le32 max_resp_sz;
+ u8 max_req_sg_elems;
+ u8 max_resp_sg_elems;
+} __packed;
+
+enum pds_fwctl_query_entity {
+ PDS_FWCTL_RPC_ROOT = 0,
+ PDS_FWCTL_RPC_ENDPOINT = 1,
+ PDS_FWCTL_RPC_OPERATION = 2,
+};
+
+#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT 0
+#define PDS_FWCTL_RPC_OPCODE_CMD_MASK GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)
+#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT 16
+#define PDS_FWCTL_RPC_OPCODE_VER_MASK GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT)
+
+#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_CMD_MASK, op)
+#define PDS_FWCTL_RPC_OPCODE_GET_VER(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_VER_MASK, op)
+
+#define PDS_FWCTL_RPC_OPCODE_CMP(op1, op2) \
+ (PDS_FWCTL_RPC_OPCODE_GET_CMD(op1) == PDS_FWCTL_RPC_OPCODE_GET_CMD(op2) && \
+ PDS_FWCTL_RPC_OPCODE_GET_VER(op1) <= PDS_FWCTL_RPC_OPCODE_GET_VER(op2))
+
+/*
+ * FW command attributes that map to the FWCTL scope values
+ */
+#define PDSFC_FW_CMD_ATTR_READ 0x00
+#define PDSFC_FW_CMD_ATTR_DEBUG_READ 0x02
+#define PDSFC_FW_CMD_ATTR_WRITE 0x04
+#define PDSFC_FW_CMD_ATTR_DEBUG_WRITE 0x08
+#define PDSFC_FW_CMD_ATTR_SYNC 0x10
+
+/**
+ * struct pds_fwctl_query_cmd - Firmware control query command structure
+ * @opcode: Operation code for the command
+ * @entity: Entity type to query (enum pds_fwctl_query_entity)
+ * @version: Version of the query data structure supported by the driver
+ * @rsvd: Reserved
+ * @query_data_buf_len: Length of the query data buffer
+ * @query_data_buf_pa: Physical address of the query data buffer
+ * @ep: Endpoint identifier to query (when entity is PDS_FWCTL_RPC_ENDPOINT)
+ * @op: Operation identifier to query (when entity is PDS_FWCTL_RPC_OPERATION)
+ *
+ * This structure is used to send a query command to the firmware control
+ * interface. The structure is packed to ensure there is no padding between
+ * the fields.
+ */
+struct pds_fwctl_query_cmd {
+ u8 opcode;
+ u8 entity;
+ u8 version;
+ u8 rsvd;
+ __le32 query_data_buf_len;
+ __le64 query_data_buf_pa;
+ union {
+ __le32 ep;
+ __le32 op;
+ };
+} __packed;
+
+/**
+ * struct pds_fwctl_query_comp - Firmware control query completion structure
+ * @status: Status of the query command
+ * @rsvd: Reserved
+ * @comp_index: Completion index in little-endian format
+ * @version: Version of the query data structure returned by firmware. This
+ * should be less than or equal to the version supported by the driver
+ * @rsvd2: Reserved
+ * @color: Color bit indicating the state of the completion
+ */
+struct pds_fwctl_query_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 version;
+ u8 rsvd2[2];
+ u8 color;
+} __packed;
+
+/**
+ * struct pds_fwctl_query_data_endpoint - query data for entity PDS_FWCTL_RPC_ROOT
+ * @id: The identifier for the data endpoint
+ */
+struct pds_fwctl_query_data_endpoint {
+ __le32 id;
+} __packed;
+
+/**
+ * struct pds_fwctl_query_data_operation - query data for entity PDS_FWCTL_RPC_ENDPOINT
+ * @id: Operation identifier
+ * @scope: Scope of the operation (enum fwctl_rpc_scope)
+ * @rsvd: Reserved
+ */
+struct pds_fwctl_query_data_operation {
+ __le32 id;
+ u8 scope;
+ u8 rsvd[3];
+} __packed;
+
+/**
+ * struct pds_fwctl_query_data - query data structure
+ * @version: Version of the query data structure
+ * @rsvd: Reserved
+ * @num_entries: Number of entries in the union
+ * @entries: Array of query data entries, depending on the entity type
+ */
+struct pds_fwctl_query_data {
+ u8 version;
+ u8 rsvd[3];
+ __le32 num_entries;
+ u8 entries[] __counted_by_le(num_entries);
+} __packed;
+
+/**
+ * struct pds_fwctl_rpc_cmd - Firmware control RPC command
+ * @opcode: opcode PDS_FWCTL_CMD_RPC
+ * @rsvd: Reserved
+ * @flags: Indicates indirect request and/or response handling
+ * @ep: Endpoint identifier
+ * @op: Operation identifier
+ * @inline_req0: Buffer for inline request
+ * @inline_req1: Buffer for inline request
+ * @req_pa: Physical address of request data
+ * @req_sz: Size of the request
+ * @req_sg_elems: Number of request SGs
+ * @req_rsvd: Reserved
+ * @inline_req2: Buffer for inline request
+ * @resp_pa: Physical address of response data
+ * @resp_sz: Size of the response
+ * @resp_sg_elems: Number of response SGs
+ * @resp_rsvd: Reserved
+ */
+struct pds_fwctl_rpc_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 flags;
+#define PDS_FWCTL_RPC_IND_REQ 0x1
+#define PDS_FWCTL_RPC_IND_RESP 0x2
+ __le32 ep;
+ __le32 op;
+ u8 inline_req0[16];
+ union {
+ u8 inline_req1[16];
+ struct {
+ __le64 req_pa;
+ __le32 req_sz;
+ u8 req_sg_elems;
+ u8 req_rsvd[3];
+ };
+ };
+ union {
+ u8 inline_req2[16];
+ struct {
+ __le64 resp_pa;
+ __le32 resp_sz;
+ u8 resp_sg_elems;
+ u8 resp_rsvd[3];
+ };
+ };
+} __packed;
+
+/**
+ * struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element
+ * @addr: DMA address of SG element data buffer
+ * @len: Length of SG element data buffer, in bytes
+ * @rsvd: Reserved
+ */
+struct pds_sg_elem {
+ __le64 addr;
+ __le32 len;
+ u8 rsvd[4];
+} __packed;
+
+/**
+ * struct pds_fwctl_rpc_comp - Completion of a firmware control RPC
+ * @status: Status of the command
+ * @rsvd: Reserved
+ * @comp_index: Completion index of the command
+ * @err: Error code, if any, from the RPC
+ * @resp_sz: Size of the response
+ * @rsvd2: Reserved
+ * @color: Color bit indicating the state of the completion
+ */
+struct pds_fwctl_rpc_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ __le32 err;
+ __le32 resp_sz;
+ u8 rsvd2[3];
+ u8 color;
+} __packed;
+
union pds_core_adminq_cmd {
u8 opcode;
u8 bytes[64];
@@ -1216,6 +1484,11 @@ union pds_core_adminq_cmd {
struct pds_lm_dirty_enable_cmd lm_dirty_enable;
struct pds_lm_dirty_disable_cmd lm_dirty_disable;
struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack;
+
+ struct pds_fwctl_cmd fwctl;
+ struct pds_fwctl_ident_cmd fwctl_ident;
+ struct pds_fwctl_rpc_cmd fwctl_rpc;
+ struct pds_fwctl_query_cmd fwctl_query;
};
union pds_core_adminq_comp {
@@ -1243,6 +1516,10 @@ union pds_core_adminq_comp {
struct pds_lm_state_size_comp lm_state_size;
struct pds_lm_dirty_status_comp lm_dirty_status;
+
+ struct pds_fwctl_comp fwctl;
+ struct pds_fwctl_rpc_comp fwctl_rpc;
+ struct pds_fwctl_query_comp fwctl_query;
};
#ifndef __CHECKER__
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
index 5802e1deef24..b193adbe7cc3 100644
--- a/include/linux/pds/pds_common.h
+++ b/include/linux/pds/pds_common.h
@@ -29,6 +29,7 @@ enum pds_core_vif_types {
PDS_DEV_TYPE_ETH = 3,
PDS_DEV_TYPE_RDMA = 4,
PDS_DEV_TYPE_LM = 5,
+ PDS_DEV_TYPE_FWCTL = 6,
/* new ones added before this line */
PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */
@@ -40,6 +41,7 @@ enum pds_core_vif_types {
#define PDS_DEV_TYPE_ETH_STR "Eth"
#define PDS_DEV_TYPE_RDMA_STR "RDMA"
#define PDS_DEV_TYPE_LM_STR "LM"
+#define PDS_DEV_TYPE_FWCTL_STR "fwctl"
#define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
#define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR
diff --git a/include/uapi/cxl/features.h b/include/uapi/cxl/features.h
new file mode 100644
index 000000000000..d6db8984889f
--- /dev/null
+++ b/include/uapi/cxl/features.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2024,2025, Intel Corporation
+ *
+ * These are definitions for the mailbox command interface of CXL subsystem.
+ */
+#ifndef _UAPI_CXL_FEATURES_H_
+#define _UAPI_CXL_FEATURES_H_
+
+#include <linux/types.h>
+#ifndef __KERNEL__
+#include <uuid/uuid.h>
+#else
+#include <linux/uuid.h>
+#endif
+
+/*
+ * struct cxl_mbox_get_sup_feats_in - Get Supported Features input
+ *
+ * @count: bytes of Feature data to return in output
+ * @start_idx: index of first requested Supported Feature Entry, 0 based.
+ * @reserved: reserved field, must be 0s.
+ *
+ * Get Supported Features (0x500h) CXL r3.2 8.2.9.6.1 command.
+ * Input block for Get support Feature
+ */
+struct cxl_mbox_get_sup_feats_in {
+ __le32 count;
+ __le16 start_idx;
+ __u8 reserved[2];
+} __attribute__ ((__packed__));
+
+/* CXL spec r3.2 Table 8-87 command effects */
+#define CXL_CMD_CONFIG_CHANGE_COLD_RESET BIT(0)
+#define CXL_CMD_CONFIG_CHANGE_IMMEDIATE BIT(1)
+#define CXL_CMD_DATA_CHANGE_IMMEDIATE BIT(2)
+#define CXL_CMD_POLICY_CHANGE_IMMEDIATE BIT(3)
+#define CXL_CMD_LOG_CHANGE_IMMEDIATE BIT(4)
+#define CXL_CMD_SECURITY_STATE_CHANGE BIT(5)
+#define CXL_CMD_BACKGROUND BIT(6)
+#define CXL_CMD_BGCMD_ABORT_SUPPORTED BIT(7)
+#define CXL_CMD_EFFECTS_VALID BIT(9)
+#define CXL_CMD_CONFIG_CHANGE_CONV_RESET BIT(10)
+#define CXL_CMD_CONFIG_CHANGE_CXL_RESET BIT(11)
+#define CXL_CMD_EFFECTS_RESERVED GENMASK(15, 12)
+
+/*
+ * struct cxl_feat_entry - Supported Feature Entry
+ * @uuid: UUID of the Feature
+ * @id: id to identify the feature. 0 based
+ * @get_feat_size: max bytes required for Get Feature command for this Feature
+ * @set_feat_size: max bytes required for Set Feature command for this Feature
+ * @flags: attribute flags
+ * @get_feat_ver: Get Feature version
+ * @set_feat_ver: Set Feature version
+ * @effects: Set Feature command effects
+ * @reserved: reserved, must be 0
+ *
+ * CXL spec r3.2 Table 8-109
+ * Get Supported Features Supported Feature Entry
+ */
+struct cxl_feat_entry {
+ uuid_t uuid;
+ __le16 id;
+ __le16 get_feat_size;
+ __le16 set_feat_size;
+ __le32 flags;
+ __u8 get_feat_ver;
+ __u8 set_feat_ver;
+ __le16 effects;
+ __u8 reserved[18];
+} __attribute__ ((__packed__));
+
+/* @flags field for 'struct cxl_feat_entry' */
+#define CXL_FEATURE_F_CHANGEABLE BIT(0)
+#define CXL_FEATURE_F_PERSIST_FW_UPDATE BIT(4)
+#define CXL_FEATURE_F_DEFAULT_SEL BIT(5)
+#define CXL_FEATURE_F_SAVED_SEL BIT(6)
+
+/*
+ * struct cxl_mbox_get_sup_feats_out - Get Supported Features output
+ * @num_entries: number of Supported Feature Entries returned
+ * @supported_feats: number of supported Features
+ * @reserved: reserved, must be 0s.
+ * @ents: Supported Feature Entries array
+ *
+ * CXL spec r3.2 Table 8-108
+ * Get supported Features Output Payload
+ */
+struct cxl_mbox_get_sup_feats_out {
+ __struct_group(cxl_mbox_get_sup_feats_out_hdr, hdr, /* no attrs */,
+ __le16 num_entries;
+ __le16 supported_feats;
+ __u8 reserved[4];
+ );
+ struct cxl_feat_entry ents[] __counted_by_le(num_entries);
+} __attribute__ ((__packed__));
+
+/*
+ * Get Feature CXL spec r3.2 Spec 8.2.9.6.2
+ */
+
+/*
+ * struct cxl_mbox_get_feat_in - Get Feature input
+ * @uuid: UUID for Feature
+ * @offset: offset of the first byte in Feature data for output payload
+ * @count: count in bytes of Feature data returned
+ * @selection: 0 current value, 1 default value, 2 saved value
+ *
+ * CXL spec r3.2 section 8.2.9.6.2 Table 8-99
+ */
+struct cxl_mbox_get_feat_in {
+ uuid_t uuid;
+ __le16 offset;
+ __le16 count;
+ __u8 selection;
+} __attribute__ ((__packed__));
+
+/*
+ * enum cxl_get_feat_selection - selection field of Get Feature input
+ */
+enum cxl_get_feat_selection {
+ CXL_GET_FEAT_SEL_CURRENT_VALUE,
+ CXL_GET_FEAT_SEL_DEFAULT_VALUE,
+ CXL_GET_FEAT_SEL_SAVED_VALUE,
+ CXL_GET_FEAT_SEL_MAX
+};
+
+/*
+ * Set Feature CXL spec r3.2 8.2.9.6.3
+ */
+
+/*
+ * struct cxl_mbox_set_feat_in - Set Features input
+ * @uuid: UUID for Feature
+ * @flags: set feature flags
+ * @offset: byte offset of Feature data to update
+ * @version: Feature version of the data in Feature Data
+ * @rsvd: reserved, must be 0s.
+ * @feat_data: raw byte stream of Features data to update
+ *
+ * CXL spec r3.2 section 8.2.9.6.3 Table 8-101
+ */
+struct cxl_mbox_set_feat_in {
+ __struct_group(cxl_mbox_set_feat_hdr, hdr, /* no attrs */,
+ uuid_t uuid;
+ __le32 flags;
+ __le16 offset;
+ __u8 version;
+ __u8 rsvd[9];
+ );
+ __u8 feat_data[];
+} __packed;
+
+/*
+ * enum cxl_set_feat_flag_data_transfer - Set Feature flags field
+ */
+enum cxl_set_feat_flag_data_transfer {
+ CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER = 0,
+ CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER,
+ CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER,
+ CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER,
+ CXL_SET_FEAT_FLAG_ABORT_DATA_TRANSFER,
+ CXL_SET_FEAT_FLAG_DATA_TRANSFER_MAX
+};
+
+#define CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK GENMASK(2, 0)
+#define CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET BIT(3)
+
+#endif
diff --git a/include/uapi/fwctl/cxl.h b/include/uapi/fwctl/cxl.h
new file mode 100644
index 000000000000..43f522f0cdcd
--- /dev/null
+++ b/include/uapi/fwctl/cxl.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2024-2025 Intel Corporation
+ *
+ * These are definitions for the mailbox command interface of CXL subsystem.
+ */
+#ifndef _UAPI_FWCTL_CXL_H_
+#define _UAPI_FWCTL_CXL_H_
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <cxl/features.h>
+
+/**
+ * struct fwctl_rpc_cxl - ioctl(FWCTL_RPC) input for CXL
+ * @opcode: CXL mailbox command opcode
+ * @flags: Flags for the command (input).
+ * @op_size: Size of input payload.
+ * @reserved1: Reserved. Must be 0s.
+ * @get_sup_feats_in: Get Supported Features input
+ * @get_feat_in: Get Feature input
+ * @set_feat_in: Set Feature input
+ */
+struct fwctl_rpc_cxl {
+ __struct_group(fwctl_rpc_cxl_hdr, hdr, /* no attrs */,
+ __u32 opcode;
+ __u32 flags;
+ __u32 op_size;
+ __u32 reserved1;
+ );
+ union {
+ struct cxl_mbox_get_sup_feats_in get_sup_feats_in;
+ struct cxl_mbox_get_feat_in get_feat_in;
+ struct cxl_mbox_set_feat_in set_feat_in;
+ };
+};
+
+/**
+ * struct fwctl_rpc_cxl_out - ioctl(FWCTL_RPC) output for CXL
+ * @size: Size of the output payload
+ * @retval: Return value from device
+ * @get_sup_feats_out: Get Supported Features output
+ * @payload: raw byte stream of payload
+ */
+struct fwctl_rpc_cxl_out {
+ __struct_group(fwctl_rpc_cxl_out_hdr, hdr, /* no attrs */,
+ __u32 size;
+ __u32 retval;
+ );
+ union {
+ struct cxl_mbox_get_sup_feats_out get_sup_feats_out;
+ __DECLARE_FLEX_ARRAY(__u8, payload);
+ };
+};
+
+#endif
diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h
new file mode 100644
index 000000000000..716ac0eee42d
--- /dev/null
+++ b/include/uapi/fwctl/fwctl.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef _UAPI_FWCTL_H
+#define _UAPI_FWCTL_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define FWCTL_TYPE 0x9A
+
+/**
+ * DOC: General ioctl format
+ *
+ * The ioctl interface follows a general format to allow for extensibility. Each
+ * ioctl is passed a structure pointer as the argument providing the size of
+ * the structure in the first u32. The kernel checks that any structure space
+ * beyond what it understands is 0. This allows userspace to use the backward
+ * compatible portion while consistently using the newer, larger, structures.
+ *
+ * ioctls use a standard meaning for common errnos:
+ *
+ * - ENOTTY: The IOCTL number itself is not supported at all
+ * - E2BIG: The IOCTL number is supported, but the provided structure has
+ * non-zero in a part the kernel does not understand.
+ * - EOPNOTSUPP: The IOCTL number is supported, and the structure is
+ * understood, however a known field has a value the kernel does not
+ * understand or support.
+ * - EINVAL: Everything about the IOCTL was understood, but a field is not
+ * correct.
+ * - ENOMEM: Out of memory.
+ * - ENODEV: The underlying device has been hot-unplugged and the FD is
+ * orphaned.
+ *
+ * As well as additional errnos, within specific ioctls.
+ */
+enum {
+ FWCTL_CMD_BASE = 0,
+ FWCTL_CMD_INFO = 0,
+ FWCTL_CMD_RPC = 1,
+};
+
+enum fwctl_device_type {
+ FWCTL_DEVICE_TYPE_ERROR = 0,
+ FWCTL_DEVICE_TYPE_MLX5 = 1,
+ FWCTL_DEVICE_TYPE_CXL = 2,
+ FWCTL_DEVICE_TYPE_PDS = 4,
+};
+
+/**
+ * struct fwctl_info - ioctl(FWCTL_INFO)
+ * @size: sizeof(struct fwctl_info)
+ * @flags: Must be 0
+ * @out_device_type: Returns the type of the device from enum fwctl_device_type
+ * @device_data_len: On input the length of the out_device_data memory. On
+ * output the size of the kernel's device_data which may be larger or
+ * smaller than the input. Maybe 0 on input.
+ * @out_device_data: Pointer to a memory of device_data_len bytes. Kernel will
+ * fill the entire memory, zeroing as required.
+ *
+ * Returns basic information about this fwctl instance, particularly what driver
+ * is being used to define the device_data format.
+ */
+struct fwctl_info {
+ __u32 size;
+ __u32 flags;
+ __u32 out_device_type;
+ __u32 device_data_len;
+ __aligned_u64 out_device_data;
+};
+#define FWCTL_INFO _IO(FWCTL_TYPE, FWCTL_CMD_INFO)
+
+/**
+ * enum fwctl_rpc_scope - Scope of access for the RPC
+ *
+ * Refer to fwctl.rst for a more detailed discussion of these scopes.
+ */
+enum fwctl_rpc_scope {
+ /**
+ * @FWCTL_RPC_CONFIGURATION: Device configuration access scope
+ *
+ * Read/write access to device configuration. When configuration
+ * is written to the device it remains in a fully supported state.
+ */
+ FWCTL_RPC_CONFIGURATION = 0,
+ /**
+ * @FWCTL_RPC_DEBUG_READ_ONLY: Read only access to debug information
+ *
+ * Readable debug information. Debug information is compatible with
+ * kernel lockdown, and does not disclose any sensitive information. For
+ * instance exposing any encryption secrets from this information is
+ * forbidden.
+ */
+ FWCTL_RPC_DEBUG_READ_ONLY = 1,
+ /**
+ * @FWCTL_RPC_DEBUG_WRITE: Writable access to lockdown compatible debug information
+ *
+ * Allows write access to data in the device which may leave a fully
+ * supported state. This is intended to permit intensive and possibly
+ * invasive debugging. This scope will taint the kernel.
+ */
+ FWCTL_RPC_DEBUG_WRITE = 2,
+ /**
+ * @FWCTL_RPC_DEBUG_WRITE_FULL: Write access to all debug information
+ *
+ * Allows read/write access to everything. Requires CAP_SYS_RAW_IO, so
+ * it is not required to follow lockdown principals. If in doubt
+ * debugging should be placed in this scope. This scope will taint the
+ * kernel.
+ */
+ FWCTL_RPC_DEBUG_WRITE_FULL = 3,
+};
+
+/**
+ * struct fwctl_rpc - ioctl(FWCTL_RPC)
+ * @size: sizeof(struct fwctl_rpc)
+ * @scope: One of enum fwctl_rpc_scope, required scope for the RPC
+ * @in_len: Length of the in memory
+ * @out_len: Length of the out memory
+ * @in: Request message in device specific format
+ * @out: Response message in device specific format
+ *
+ * Deliver a Remote Procedure Call to the device FW and return the response. The
+ * call's parameters and return are marshaled into linear buffers of memory. Any
+ * errno indicates that delivery of the RPC to the device failed. Return status
+ * originating in the device during a successful delivery must be encoded into
+ * out.
+ *
+ * The format of the buffers matches the out_device_type from FWCTL_INFO.
+ */
+struct fwctl_rpc {
+ __u32 size;
+ __u32 scope;
+ __u32 in_len;
+ __u32 out_len;
+ __aligned_u64 in;
+ __aligned_u64 out;
+};
+#define FWCTL_RPC _IO(FWCTL_TYPE, FWCTL_CMD_RPC)
+
+#endif
diff --git a/include/uapi/fwctl/mlx5.h b/include/uapi/fwctl/mlx5.h
new file mode 100644
index 000000000000..625819180ac6
--- /dev/null
+++ b/include/uapi/fwctl/mlx5.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ *
+ * These are definitions for the command interface for mlx5 HW. mlx5 FW has a
+ * User Context mechanism which allows the FW to understand a security scope.
+ * FWCTL binds each FD to a FW user context and then places the User Context ID
+ * (UID) in each command header. The created User Context has a capability set
+ * that is appropriate for FWCTL's security model.
+ *
+ * Command formation should use a copy of the structs in mlx5_ifc.h following
+ * the Programmers Reference Manual. A open release is available here:
+ *
+ * https://network.nvidia.com/files/doc-2020/ethernet-adapters-programming-manual.pdf
+ *
+ * The device_type for this file is FWCTL_DEVICE_TYPE_MLX5.
+ */
+#ifndef _UAPI_FWCTL_MLX5_H
+#define _UAPI_FWCTL_MLX5_H
+
+#include <linux/types.h>
+
+/**
+ * struct fwctl_info_mlx5 - ioctl(FWCTL_INFO) out_device_data
+ * @uid: The FW UID this FD is bound to. Each command header will force
+ * this value.
+ * @uctx_caps: The FW capabilities that are enabled for the uid.
+ *
+ * Return basic information about the FW interface available.
+ */
+struct fwctl_info_mlx5 {
+ __u32 uid;
+ __u32 uctx_caps;
+};
+
+#endif
diff --git a/include/uapi/fwctl/pds.h b/include/uapi/fwctl/pds.h
new file mode 100644
index 000000000000..66be32411642
--- /dev/null
+++ b/include/uapi/fwctl/pds.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright(c) Advanced Micro Devices, Inc */
+
+/*
+ * fwctl interface info for pds_fwctl
+ */
+
+#ifndef _UAPI_FWCTL_PDS_H_
+#define _UAPI_FWCTL_PDS_H_
+
+#include <linux/types.h>
+
+/**
+ * struct fwctl_info_pds
+ * @uctx_caps: bitmap of firmware capabilities
+ *
+ * Return basic information about the FW interface available.
+ */
+struct fwctl_info_pds {
+ __u32 uctx_caps;
+};
+
+/**
+ * enum pds_fwctl_capabilities
+ * @PDS_FWCTL_QUERY_CAP: firmware can be queried for information
+ * @PDS_FWCTL_SEND_CAP: firmware can be sent commands
+ */
+enum pds_fwctl_capabilities {
+ PDS_FWCTL_QUERY_CAP = 0,
+ PDS_FWCTL_SEND_CAP,
+};
+
+/**
+ * struct fwctl_rpc_pds
+ * @in.op: requested operation code
+ * @in.ep: firmware endpoint to operate on
+ * @in.rsvd: reserved
+ * @in.len: length of payload data
+ * @in.payload: address of payload buffer
+ * @in: rpc in parameters
+ * @out.retval: operation result value
+ * @out.rsvd: reserved
+ * @out.len: length of result data buffer
+ * @out.payload: address of payload data buffer
+ * @out: rpc out parameters
+ */
+struct fwctl_rpc_pds {
+ struct {
+ __u32 op;
+ __u32 ep;
+ __u32 rsvd;
+ __u32 len;
+ __aligned_u64 payload;
+ } in;
+ struct {
+ __u32 retval;
+ __u32 rsvd[2];
+ __u32 len;
+ __aligned_u64 payload;
+ } out;
+};
+#endif /* _UAPI_FWCTL_PDS_H_ */
diff --git a/kernel/panic.c b/kernel/panic.c
index d8635d5cecb2..0c55eec9e874 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -511,6 +511,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
TAINT_FLAG(AUX, 'X', ' ', true),
TAINT_FLAG(RANDSTRUCT, 'T', ' ', true),
TAINT_FLAG(TEST, 'N', ' ', true),
+ TAINT_FLAG(FWCTL, 'J', ' ', true),
};
#undef TAINT_FLAG
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 279be06332be..e7da0909d097 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -204,6 +204,14 @@ else
echo " * an in-kernel test (such as a KUnit test) has been run (#18)"
fi
+T=`expr $T / 2`
+if [ `expr $T % 2` -eq 0 ]; then
+ addout " "
+else
+ addout "J"
+ echo " * fwctl's mutating debug interface was used (#19)"
+fi
+
echo "For a more detailed explanation of the various taint flags see"
echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index b1256fee3567..0a6572ab6f37 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -63,6 +63,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
+cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
cxl_core-y += config_check.o
cxl_core-y += cxl_core_test.o
cxl_core-y += cxl_core_exports.o
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 8d731bd63988..9495dbcc03a7 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -45,6 +45,18 @@ static struct cxl_cel_entry mock_cel[] = {
.effect = CXL_CMD_EFFECT_NONE,
},
{
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_FEATURES),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FEATURE),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_FEATURE),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE)),
+ },
+ {
.opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY),
.effect = CXL_CMD_EFFECT_NONE,
},
@@ -145,6 +157,10 @@ struct mock_event_store {
u32 ev_status;
};
+struct vendor_test_feat {
+ __le32 data;
+} __packed;
+
struct cxl_mockmem_data {
void *lsa;
void *fw;
@@ -161,6 +177,7 @@ struct cxl_mockmem_data {
u8 event_buf[SZ_4K];
u64 timestamp;
unsigned long sanitize_timeout;
+ struct vendor_test_feat test_feat;
};
static struct mock_event_log *event_find_log(struct device *dev, int log_type)
@@ -1354,6 +1371,151 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata,
return -EINVAL;
}
+#define CXL_VENDOR_FEATURE_TEST \
+ UUID_INIT(0xffffffff, 0xffff, 0xffff, 0xff, 0xff, 0xff, 0xff, 0xff, \
+ 0xff, 0xff, 0xff)
+
+static void fill_feature_vendor_test(struct cxl_feat_entry *feat)
+{
+ feat->uuid = CXL_VENDOR_FEATURE_TEST;
+ feat->id = 0;
+ feat->get_feat_size = cpu_to_le16(0x4);
+ feat->set_feat_size = cpu_to_le16(0x4);
+ feat->flags = cpu_to_le32(CXL_FEATURE_F_CHANGEABLE |
+ CXL_FEATURE_F_DEFAULT_SEL |
+ CXL_FEATURE_F_SAVED_SEL);
+ feat->get_feat_ver = 1;
+ feat->set_feat_ver = 1;
+ feat->effects = cpu_to_le16(CXL_CMD_CONFIG_CHANGE_COLD_RESET |
+ CXL_CMD_EFFECTS_VALID);
+}
+
+#define MAX_CXL_TEST_FEATS 1
+
+static int mock_get_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct vendor_test_feat *output = cmd->payload_out;
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+ u16 offset = le16_to_cpu(input->offset);
+ u16 count = le16_to_cpu(input->count);
+ u8 *ptr;
+
+ if (offset > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ if (offset + count > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ ptr = (u8 *)&mdata->test_feat + offset;
+ memcpy((u8 *)output + offset, ptr, count);
+
+ return 0;
+}
+
+static int mock_get_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_get_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_set_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+ struct vendor_test_feat *test =
+ (struct vendor_test_feat *)input->feat_data;
+ u32 action;
+
+ action = FIELD_GET(CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK,
+ le32_to_cpu(input->hdr.flags));
+ /*
+ * While it is spec compliant to support other set actions, it is not
+ * necessary to add the complication in the emulation currently. Reject
+ * anything besides full xfer.
+ */
+ if (action != CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ /* Offset should be reserved when doing full transfer */
+ if (input->hdr.offset) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ memcpy(&mdata->test_feat.data, &test->data, sizeof(u32));
+
+ return 0;
+}
+
+static int mock_set_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->hdr.uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_set_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_get_supported_features(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_sup_feats_in *in = cmd->payload_in;
+ struct cxl_mbox_get_sup_feats_out *out = cmd->payload_out;
+ struct cxl_feat_entry *feat;
+ u16 start_idx, count;
+
+ if (cmd->size_out < sizeof(*out)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ /*
+ * Current emulation only supports 1 feature
+ */
+ start_idx = le16_to_cpu(in->start_idx);
+ if (start_idx != 0) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ count = le16_to_cpu(in->count);
+ if (count < struct_size(out, ents, 0)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ out->supported_feats = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ cmd->return_code = 0;
+ if (count < struct_size(out, ents, MAX_CXL_TEST_FEATS)) {
+ out->num_entries = 0;
+ return 0;
+ }
+
+ out->num_entries = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ feat = out->ents;
+ fill_feature_vendor_test(feat);
+
+ return 0;
+}
+
static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *cmd)
{
@@ -1439,6 +1601,15 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
case CXL_MBOX_OP_ACTIVATE_FW:
rc = mock_activate_fw(mdata, cmd);
break;
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ rc = mock_get_supported_features(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_FEATURE:
+ rc = mock_get_feature(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SET_FEATURE:
+ rc = mock_set_feature(mdata, cmd);
+ break;
default:
break;
}
@@ -1486,6 +1657,11 @@ static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds)
return 0;
}
+static void cxl_mock_test_feat_init(struct cxl_mockmem_data *mdata)
+{
+ mdata->test_feat.data = cpu_to_le32(0xdeadbeef);
+}
+
static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -1558,6 +1734,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
+ rc = devm_cxl_setup_features(cxlds);
+ if (rc)
+ dev_dbg(dev, "No CXL Features discovered\n");
+
cxl_mock_add_event_logs(&mdata->mes);
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
@@ -1572,7 +1752,12 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
+ rc = devm_cxl_setup_fwctl(cxlmd);
+ if (rc)
+ dev_dbg(dev, "No CXL FWCTL setup\n");
+
cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
+ cxl_mock_test_feat_init(mdata);
return 0;
}