diff options
46 files changed, 4054 insertions, 132 deletions
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst index 700aa72eecb1..a0cc017e4424 100644 --- a/Documentation/admin-guide/tainted-kernels.rst +++ b/Documentation/admin-guide/tainted-kernels.rst @@ -101,6 +101,7 @@ Bit Log Number Reason that got the kernel tainted 16 _/X 65536 auxiliary taint, defined for and used by distros 17 _/T 131072 kernel was built with the struct randomization plugin 18 _/N 262144 an in-kernel test has been run + 19 _/J 524288 userspace used a mutating debug operation in fwctl === === ====== ======================================================== Note: The character ``_`` is representing a blank in this table to make reading @@ -184,3 +185,7 @@ More detailed explanation for tainting build time. 18) ``N`` if an in-kernel test, such as a KUnit test, has been run. + + 19) ``J`` if userpace opened /dev/fwctl/* and performed a FWTCL_RPC_DEBUG_WRITE + to use the devices debugging features. Device debugging features could + cause the device to malfunction in undefined ways. diff --git a/Documentation/userspace-api/fwctl/fwctl-cxl.rst b/Documentation/userspace-api/fwctl/fwctl-cxl.rst new file mode 100644 index 000000000000..670b43b72949 --- /dev/null +++ b/Documentation/userspace-api/fwctl/fwctl-cxl.rst @@ -0,0 +1,142 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +fwctl cxl driver +================ + +:Author: Dave Jiang + +Overview +======== + +The CXL spec defines a set of commands that can be issued to the mailbox of a +CXL device or switch. It also left room for vendor specific commands to be +issued to the mailbox as well. fwctl provides a path to issue a set of allowed +mailbox commands from user space to the device moderated by the kernel driver. + +The following 3 commands will be used to support CXL Features: +CXL spec r3.1 8.2.9.6.1 Get Supported Features (Opcode 0500h) +CXL spec r3.1 8.2.9.6.2 Get Feature (Opcode 0501h) +CXL spec r3.1 8.2.9.6.3 Set Feature (Opcode 0502h) + +The "Get Supported Features" return data may be filtered by the kernel driver to +drop any features that are forbidden by the kernel or being exclusively used by +the kernel. The driver will set the "Set Feature Size" of the "Get Supported +Features Supported Feature Entry" to 0 to indicate that the Feature cannot be +modified. The "Get Supported Features" command and the "Get Features" falls +under the fwctl policy of FWCTL_RPC_CONFIGURATION. + +For "Set Feature" command, the access policy currently is broken down into two +categories depending on the Set Feature effects reported by the device. If the +Set Feature will cause immediate change to the device, the fwctl access policy +must be FWCTL_RPC_DEBUG_WRITE_FULL. The effects for this level are +"immediate config change", "immediate data change", "immediate policy change", +or "immediate log change" for the set effects mask. If the effects are "config +change with cold reset" or "config change with conventional reset", then the +fwctl access policy must be FWCTL_RPC_DEBUG_WRITE or higher. + +fwctl cxl User API +================== + +.. kernel-doc:: include/uapi/fwctl/cxl.h + +1. Driver info query +-------------------- + +First step for the app is to issue the ioctl(FWCTL_CMD_INFO). Successful +invocation of the ioctl implies the Features capability is operational and +returns an all zeros 32bit payload. A ``struct fwctl_info`` needs to be filled +out with the ``fwctl_info.out_device_type`` set to ``FWCTL_DEVICE_TYPE_CXL``. +The return data should be ``struct fwctl_info_cxl`` that contains a reserved +32bit field that should be all zeros. + +2. Send hardware commands +------------------------- + +Next step is to send the 'Get Supported Features' command to the driver from +user space via ioctl(FWCTL_RPC). A ``struct fwctl_rpc_cxl`` is pointed to +by ``fwctl_rpc.in``. ``struct fwctl_rpc_cxl.in_payload`` points to +the hardware input structure that is defined by the CXL spec. ``fwctl_rpc.out`` +points to the buffer that contains a ``struct fwctl_rpc_cxl_out`` that includes +the hardware output data inlined as ``fwctl_rpc_cxl_out.payload``. This command +is called twice. First time to retrieve the number of features supported. +A second time to retrieve the specific feature details as the output data. + +After getting the specific feature details, a Get/Set Feature command can be +appropriately programmed and sent. For a "Set Feature" command, the retrieved +feature info contains an effects field that details the resulting +"Set Feature" command will trigger. That will inform the user whether +the system is configured to allowed the "Set Feature" command or not. + +Code example of a Get Feature +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + + static int cxl_fwctl_rpc_get_test_feature(int fd, struct test_feature *feat_ctx, + const uint32_t expected_data) + { + struct cxl_mbox_get_feat_in *feat_in; + struct fwctl_rpc_cxl_out *out; + struct fwctl_rpc rpc = {0}; + struct fwctl_rpc_cxl *in; + size_t out_size, in_size; + uint32_t val; + void *data; + int rc; + + in_size = sizeof(*in) + sizeof(*feat_in); + rc = posix_memalign((void **)&in, 16, in_size); + if (rc) + return -ENOMEM; + memset(in, 0, in_size); + feat_in = &in->get_feat_in; + + uuid_copy(feat_in->uuid, feat_ctx->uuid); + feat_in->count = feat_ctx->get_size; + + out_size = sizeof(*out) + feat_ctx->get_size; + rc = posix_memalign((void **)&out, 16, out_size); + if (rc) + goto free_in; + memset(out, 0, out_size); + + in->opcode = CXL_MBOX_OPCODE_GET_FEATURE; + in->op_size = sizeof(*feat_in); + + rpc.size = sizeof(rpc); + rpc.scope = FWCTL_RPC_CONFIGURATION; + rpc.in_len = in_size; + rpc.out_len = out_size; + rpc.in = (uint64_t)(uint64_t *)in; + rpc.out = (uint64_t)(uint64_t *)out; + + rc = send_command(fd, &rpc, out); + if (rc) + goto free_all; + + data = out->payload; + val = le32toh(*(__le32 *)data); + if (memcmp(&val, &expected_data, sizeof(val)) != 0) { + rc = -ENXIO; + goto free_all; + } + + free_all: + free(out); + free_in: + free(in); + return rc; + } + +Take a look at CXL CLI test directory +<https://github.com/pmem/ndctl/tree/main/test/fwctl.c> for a detailed user code +for examples on how to exercise this path. + + +fwctl cxl Kernel API +==================== + +.. kernel-doc:: drivers/cxl/core/features.c + :export: +.. kernel-doc:: include/cxl/features.h diff --git a/Documentation/userspace-api/fwctl/fwctl.rst b/Documentation/userspace-api/fwctl/fwctl.rst new file mode 100644 index 000000000000..fdcfe418a83f --- /dev/null +++ b/Documentation/userspace-api/fwctl/fwctl.rst @@ -0,0 +1,286 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +fwctl subsystem +=============== + +:Author: Jason Gunthorpe + +Overview +======== + +Modern devices contain extensive amounts of FW, and in many cases, are largely +software-defined pieces of hardware. The evolution of this approach is largely a +reaction to Moore's Law where a chip tape out is now highly expensive, and the +chip design is extremely large. Replacing fixed HW logic with a flexible and +tightly coupled FW/HW combination is an effective risk mitigation against chip +respin. Problems in the HW design can be counteracted in device FW. This is +especially true for devices which present a stable and backwards compatible +interface to the operating system driver (such as NVMe). + +The FW layer in devices has grown to incredible size and devices frequently +integrate clusters of fast processors to run it. For example, mlx5 devices have +over 30MB of FW code, and big configurations operate with over 1GB of FW managed +runtime state. + +The availability of such a flexible layer has created quite a variety in the +industry where single pieces of silicon are now configurable software-defined +devices and can operate in substantially different ways depending on the need. +Further, we often see cases where specific sites wish to operate devices in ways +that are highly specialized and require applications that have been tailored to +their unique configuration. + +Further, devices have become multi-functional and integrated to the point they +no longer fit neatly into the kernel's division of subsystems. Modern +multi-functional devices have drivers, such as bnxt/ice/mlx5/pds, that span many +subsystems while sharing the underlying hardware using the auxiliary device +system. + +All together this creates a challenge for the operating system, where devices +have an expansive FW environment that needs robust device-specific debugging +support, and FW-driven functionality that is not well suited to “generic” +interfaces. fwctl seeks to allow access to the full device functionality from +user space in the areas of debuggability, management, and first-boot/nth-boot +provisioning. + +fwctl is aimed at the common device design pattern where the OS and FW +communicate via an RPC message layer constructed with a queue or mailbox scheme. +In this case the driver will typically have some layer to deliver RPC messages +and collect RPC responses from device FW. The in-kernel subsystem drivers that +operate the device for its primary purposes will use these RPCs to build their +drivers, but devices also usually have a set of ancillary RPCs that don't really +fit into any specific subsystem. For example, a HW RAID controller is primarily +operated by the block layer but also comes with a set of RPCs to administer the +construction of drives within the HW RAID. + +In the past when devices were more single function, individual subsystems would +grow different approaches to solving some of these common problems. For instance +monitoring device health, manipulating its FLASH, debugging the FW, +provisioning, all have various unique interfaces across the kernel. + +fwctl's purpose is to define a common set of limited rules, described below, +that allow user space to securely construct and execute RPCs inside device FW. +The rules serve as an agreement between the operating system and FW on how to +correctly design the RPC interface. As a uAPI the subsystem provides a thin +layer of discovery and a generic uAPI to deliver the RPCs and collect the +response. It supports a system of user space libraries and tools which will +use this interface to control the device using the device native protocols. + +Scope of Action +--------------- + +fwctl drivers are strictly restricted to being a way to operate the device FW. +It is not an avenue to access random kernel internals, or other operating system +SW states. + +fwctl instances must operate on a well-defined device function, and the device +should have a well-defined security model for what scope within the physical +device the function is permitted to access. For instance, the most complex PCIe +device today may broadly have several function-level scopes: + + 1. A privileged function with full access to the on-device global state and + configuration + + 2. Multiple hypervisor functions with control over itself and child functions + used with VMs + + 3. Multiple VM functions tightly scoped within the VM + +The device may create a logical parent/child relationship between these scopes. +For instance a child VM's FW may be within the scope of the hypervisor FW. It is +quite common in the VFIO world that the hypervisor environment has a complex +provisioning/profiling/configuration responsibility for the function VFIO +assigns to the VM. + +Further, within the function, devices often have RPC commands that fall within +some general scopes of action (see enum fwctl_rpc_scope): + + 1. Access to function & child configuration, FLASH, etc. that becomes live at a + function reset. Access to function & child runtime configuration that is + transparent or non-disruptive to any driver or VM. + + 2. Read-only access to function debug information that may report on FW objects + in the function & child, including FW objects owned by other kernel + subsystems. + + 3. Write access to function & child debug information strictly compatible with + the principles of kernel lockdown and kernel integrity protection. Triggers + a kernel Taint. + + 4. Full debug device access. Triggers a kernel Taint, requires CAP_SYS_RAWIO. + +User space will provide a scope label on each RPC and the kernel must enforce the +above CAPs and taints based on that scope. A combination of kernel and FW can +enforce that RPCs are placed in the correct scope by user space. + +Denied behavior +--------------- + +There are many things this interface must not allow user space to do (without a +Taint or CAP), broadly derived from the principles of kernel lockdown. Some +examples: + + 1. DMA to/from arbitrary memory, hang the system, compromise FW integrity with + untrusted code, or otherwise compromise device or system security and + integrity. + + 2. Provide an abnormal “back door” to kernel drivers. No manipulation of kernel + objects owned by kernel drivers. + + 3. Directly configure or otherwise control kernel drivers. A subsystem kernel + driver can react to the device configuration at function reset/driver load + time, but otherwise must not be coupled to fwctl. + + 4. Operate the HW in a way that overlaps with the core purpose of another + primary kernel subsystem, such as read/write to LBAs, send/receive of + network packets, or operate an accelerator's data plane. + +fwctl is not a replacement for device direct access subsystems like uacce or +VFIO. + +Operations exposed through fwctl's non-taining interfaces should be fully +sharable with other users of the device. For instance exposing a RPC through +fwctl should never prevent a kernel subsystem from also concurrently using that +same RPC or hardware unit down the road. In such cases fwctl will be less +important than proper kernel subsystems that eventually emerge. Mistakes in this +area resulting in clashes will be resolved in favour of a kernel implementation. + +fwctl User API +============== + +.. kernel-doc:: include/uapi/fwctl/fwctl.h +.. kernel-doc:: include/uapi/fwctl/mlx5.h +.. kernel-doc:: include/uapi/fwctl/pds.h + +sysfs Class +----------- + +fwctl has a sysfs class (/sys/class/fwctl/fwctlNN/) and character devices +(/dev/fwctl/fwctlNN) with a simple numbered scheme. The character device +operates the iotcl uAPI described above. + +fwctl devices can be related to driver components in other subsystems through +sysfs:: + + $ ls /sys/class/fwctl/fwctl0/device/infiniband/ + ibp0s10f0 + + $ ls /sys/class/infiniband/ibp0s10f0/device/fwctl/ + fwctl0/ + + $ ls /sys/devices/pci0000:00/0000:00:0a.0/fwctl/fwctl0 + dev device power subsystem uevent + +User space Community +-------------------- + +Drawing inspiration from nvme-cli, participating in the kernel side must come +with a user space in a common TBD git tree, at a minimum to usefully operate the +kernel driver. Providing such an implementation is a pre-condition to merging a +kernel driver. + +The goal is to build user space community around some of the shared problems +we all have, and ideally develop some common user space programs with some +starting themes of: + + - Device in-field debugging + + - HW provisioning + + - VFIO child device profiling before VM boot + + - Confidential Compute topics (attestation, secure provisioning) + +that stretch across all subsystems in the kernel. fwupd is a great example of +how an excellent user space experience can emerge out of kernel-side diversity. + +fwctl Kernel API +================ + +.. kernel-doc:: drivers/fwctl/main.c + :export: +.. kernel-doc:: include/linux/fwctl.h + +fwctl Driver design +------------------- + +In many cases a fwctl driver is going to be part of a larger cross-subsystem +device possibly using the auxiliary_device mechanism. In that case several +subsystems are going to be sharing the same device and FW interface layer so the +device design must already provide for isolation and cooperation between kernel +subsystems. fwctl should fit into that same model. + +Part of the driver should include a description of how its scope restrictions +and security model work. The driver and FW together must ensure that RPCs +provided by user space are mapped to the appropriate scope. If the validation is +done in the driver then the validation can read a 'command effects' report from +the device, or hardwire the enforcement. If the validation is done in the FW, +then the driver should pass the fwctl_rpc_scope to the FW along with the command. + +The driver and FW must cooperate to ensure that either fwctl cannot allocate +any FW resources, or any resources it does allocate are freed on FD closure. A +driver primarily constructed around FW RPCs may find that its core PCI function +and RPC layer belongs under fwctl with auxiliary devices connecting to other +subsystems. + +Each device type must be mindful of Linux's philosophy for stable ABI. The FW +RPC interface does not have to meet a strictly stable ABI, but it does need to +meet an expectation that userspace tools that are deployed and in significant +use don't needlessly break. FW upgrade and kernel upgrade should keep widely +deployed tooling working. + +Development and debugging focused RPCs under more permissive scopes can have +less stabilitiy if the tools using them are only run under exceptional +circumstances and not for every day use of the device. Debugging tools may even +require exact version matching as they may require something similar to DWARF +debug information from the FW binary. + +Security Response +================= + +The kernel remains the gatekeeper for this interface. If violations of the +scopes, security or isolation principles are found, we have options to let +devices fix them with a FW update, push a kernel patch to parse and block RPC +commands or push a kernel patch to block entire firmware versions/devices. + +While the kernel can always directly parse and restrict RPCs, it is expected +that the existing kernel pattern of allowing drivers to delegate validation to +FW to be a useful design. + +Existing Similar Examples +========================= + +The approach described in this document is not a new idea. Direct, or near +direct device access has been offered by the kernel in different areas for +decades. With more devices wanting to follow this design pattern it is becoming +clear that it is not entirely well understood and, more importantly, the +security considerations are not well defined or agreed upon. + +Some examples: + + - HW RAID controllers. This includes RPCs to do things like compose drives into + a RAID volume, configure RAID parameters, monitor the HW and more. + + - Baseboard managers. RPCs for configuring settings in the device and more + + - NVMe vendor command capsules. nvme-cli provides access to some monitoring + functions that different products have defined, but more exist. + + - CXL also has a NVMe-like vendor command system. + + - DRM allows user space drivers to send commands to the device via kernel + mediation + + - RDMA allows user space drivers to directly push commands to the device + without kernel involvement + + - Various “raw” APIs, raw HID (SDL2), raw USB, NVMe Generic Interface, etc. + +The first 4 are examples of areas that fwctl intends to cover. The latter three +are examples of denied behavior as they fully overlap with the primary purpose +of a kernel subsystem. + +Some key lessons learned from these past efforts are the importance of having a +common user space project to use as a pre-condition for obtaining a kernel +driver. Developing good community around useful software in user space is key to +getting companies to fund participation to enable their products. diff --git a/Documentation/userspace-api/fwctl/index.rst b/Documentation/userspace-api/fwctl/index.rst new file mode 100644 index 000000000000..316ac456ad3b --- /dev/null +++ b/Documentation/userspace-api/fwctl/index.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Firmware Control (FWCTL) Userspace API +====================================== + +A framework that define a common set of limited rules that allows user space +to securely construct and execute RPCs inside device firmware. + +.. toctree:: + :maxdepth: 1 + + fwctl + fwctl-cxl + pds_fwctl diff --git a/Documentation/userspace-api/fwctl/pds_fwctl.rst b/Documentation/userspace-api/fwctl/pds_fwctl.rst new file mode 100644 index 000000000000..b5a31f82c883 --- /dev/null +++ b/Documentation/userspace-api/fwctl/pds_fwctl.rst @@ -0,0 +1,46 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +fwctl pds driver +================ + +:Author: Shannon Nelson + +Overview +======== + +The PDS Core device makes a fwctl service available through an +auxiliary_device named pds_core.fwctl.N. The pds_fwctl driver binds to +this device and registers itself with the fwctl subsystem. The resulting +userspace interface is used by an application that is a part of the +AMD Pensando software package for the Distributed Service Card (DSC). + +The pds_fwctl driver has little knowledge of the firmware's internals. +It only knows how to send commands through pds_core's message queue to the +firmware for fwctl requests. The set of fwctl operations available +depends on the firmware in the DSC, and the userspace application +version must match the firmware so that they can talk to each other. + +When a connection is created the pds_fwctl driver requests from the +firmware a list of firmware object endpoints, and for each endpoint the +driver requests a list of operations for that endpoint. + +Each operation description includes a firmware defined command attribute +that maps to the FWCTL scope levels. The driver translates those firmware +values into the FWCTL scope values which can then be used for filtering the +scoped user requests. + +pds_fwctl User API +================== + +Each RPC request includes the target endpoint and the operation id, and in +and out buffer lengths and pointers. The driver verifies the existence +of the requested endpoint and operations, then checks the request scope +against the required scope of the operation. The request is then put +together with the request data and sent through pds_core's message queue +to the firmware, and the results are returned to the caller. + +The RPC endpoints, operations, and buffer contents are defined by the +particular firmware package in the device, which varies across the +available product configurations. The details are available in the +specific product SDK documentation. diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 9cbe4390c872..b8c73be4fb11 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -46,6 +46,7 @@ Devices and I/O accelerators/ocxl dma-buf-heaps dma-buf-alloc-exchange + fwctl/index gpio/index iommufd media/index diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 3cf4183b767e..3d1cd7ad9d67 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -333,6 +333,7 @@ Code Seq# Include File Comments 0x97 00-7F fs/ceph/ioctl.h Ceph file system 0x99 00-0F 537-Addinboard driver <mailto:buk@buks.ipn.de> +0x9A 00-0F include/uapi/fwctl/fwctl.h 0xA0 all linux/sdp/sdp.h Industrial Device Project <mailto:kenji@bitgate.com> 0xA1 0 linux/vtpm_proxy.h TPM Emulator Proxy Driver diff --git a/MAINTAINERS b/MAINTAINERS index 3347bd1dd615..85061326dcb0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5886,6 +5886,7 @@ M: Dan Williams <dan.j.williams@intel.com> L: linux-cxl@vger.kernel.org S: Maintained F: Documentation/driver-api/cxl +F: Documentation/userspace-api/fwctl/fwctl-cxl.rst F: drivers/cxl/ F: include/cxl/ F: include/uapi/linux/cxl_mem.h @@ -9686,6 +9687,31 @@ F: kernel/futex/* F: tools/perf/bench/futex* F: tools/testing/selftests/futex/ +FWCTL SUBSYSTEM +M: Dave Jiang <dave.jiang@intel.com> +M: Jason Gunthorpe <jgg@nvidia.com> +M: Saeed Mahameed <saeedm@nvidia.com> +R: Jonathan Cameron <Jonathan.Cameron@huawei.com> +S: Maintained +F: Documentation/userspace-api/fwctl/ +F: drivers/fwctl/ +F: include/linux/fwctl.h +F: include/uapi/fwctl/ + +FWCTL MLX5 DRIVER +M: Saeed Mahameed <saeedm@nvidia.com> +R: Itay Avraham <itayavr@nvidia.com> +L: linux-kernel@vger.kernel.org +S: Maintained +F: drivers/fwctl/mlx5/ + +FWCTL PDS DRIVER +M: Brett Creeley <brett.creeley@amd.com> +R: Shannon Nelson <shannon.nelson@amd.com> +L: linux-kernel@vger.kernel.org +S: Maintained +F: drivers/fwctl/pds/ + GALAXYCORE GC0308 CAMERA SENSOR DRIVER M: Sebastian Reichel <sre@kernel.org> L: linux-media@vger.kernel.org diff --git a/drivers/Kconfig b/drivers/Kconfig index 7bdad836fc62..7c556c5ac4fd 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -21,6 +21,8 @@ source "drivers/connector/Kconfig" source "drivers/firmware/Kconfig" +source "drivers/fwctl/Kconfig" + source "drivers/gnss/Kconfig" source "drivers/mtd/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 45d1c3e630f7..b5749cf67044 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -135,6 +135,7 @@ obj-y += ufs/ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-y += firmware/ +obj-$(CONFIG_FWCTL) += fwctl/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ obj-y += clocksource/ diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 876469e23f7a..8ac1e9d70eeb 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -7,6 +7,7 @@ menuconfig CXL_BUS select PCI_DOE select FIRMWARE_TABLE select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS + select FWCTL if CXL_FEATURES help CXL is a bus that is electrically compatible with PCI Express, but layers three protocols on that signalling (CXL.io, CXL.cache, and @@ -102,6 +103,17 @@ config CXL_MEM If unsure say 'm'. +config CXL_FEATURES + bool "CXL: Features" + depends on CXL_PCI + help + Enable support for CXL Features. A CXL device that includes a mailbox + supports commands that allows listing, getting, and setting of + optionally defined features such as memory sparing or post package + sparing. Vendors may define custom features for the device. + + If unsure say 'n' + config CXL_PORT default CXL_BUS tristate diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 9259bcc6773c..b0bfbd9eac9b 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -16,3 +16,4 @@ cxl_core-y += pmu.o cxl_core-y += cdat.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o +cxl_core-$(CONFIG_CXL_FEATURES) += features.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..17e99a25c29a 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -4,6 +4,8 @@ #ifndef __CXL_CORE_H__ #define __CXL_CORE_H__ +#include <cxl/mailbox.h> + extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; extern const struct device_type cxl_pmu_type; @@ -65,9 +67,9 @@ static inline void cxl_region_exit(void) struct cxl_send_command; struct cxl_mem_query_commands; -int cxl_query_cmd(struct cxl_memdev *cxlmd, +int cxl_query_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_mem_query_commands __user *q); -int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); +int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s); void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, resource_size_t length); @@ -115,4 +117,15 @@ bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +#ifdef CONFIG_CXL_FEATURES +size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, + enum cxl_get_feat_selection selection, + void *feat_out, size_t feat_out_size, u16 offset, + u16 *return_code); +int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, + u8 feat_version, const void *feat_data, + size_t feat_data_size, u32 feat_flag, u16 offset, + u16 *return_code); +#endif + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c new file mode 100644 index 000000000000..f4daefe3180e --- /dev/null +++ b/drivers/cxl/core/features.c @@ -0,0 +1,708 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024-2025 Intel Corporation. All rights reserved. */ +#include <linux/fwctl.h> +#include <linux/device.h> +#include <cxl/mailbox.h> +#include <cxl/features.h> +#include <uapi/fwctl/cxl.h> +#include "cxl.h" +#include "core.h" +#include "cxlmem.h" + +/* All the features below are exclusive to the kernel */ +static const uuid_t cxl_exclusive_feats[] = { + CXL_FEAT_PATROL_SCRUB_UUID, + CXL_FEAT_ECS_UUID, + CXL_FEAT_SPPR_UUID, + CXL_FEAT_HPPR_UUID, + CXL_FEAT_CACHELINE_SPARING_UUID, + CXL_FEAT_ROW_SPARING_UUID, + CXL_FEAT_BANK_SPARING_UUID, + CXL_FEAT_RANK_SPARING_UUID, +}; + +static bool is_cxl_feature_exclusive_by_uuid(const uuid_t *uuid) +{ + for (int i = 0; i < ARRAY_SIZE(cxl_exclusive_feats); i++) { + if (uuid_equal(uuid, &cxl_exclusive_feats[i])) + return true; + } + + return false; +} + +static bool is_cxl_feature_exclusive(struct cxl_feat_entry *entry) +{ + return is_cxl_feature_exclusive_by_uuid(&entry->uuid); +} + +inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) +{ + return cxlds->cxlfs; +} +EXPORT_SYMBOL_NS_GPL(to_cxlfs, "CXL"); + +static int cxl_get_supported_features_count(struct cxl_mailbox *cxl_mbox) +{ + struct cxl_mbox_get_sup_feats_out mbox_out; + struct cxl_mbox_get_sup_feats_in mbox_in; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + memset(&mbox_in, 0, sizeof(mbox_in)); + mbox_in.count = cpu_to_le32(sizeof(mbox_out)); + memset(&mbox_out, 0, sizeof(mbox_out)); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES, + .size_in = sizeof(mbox_in), + .payload_in = &mbox_in, + .size_out = sizeof(mbox_out), + .payload_out = &mbox_out, + .min_out = sizeof(mbox_out), + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0) + return rc; + + return le16_to_cpu(mbox_out.supported_feats); +} + +static struct cxl_feat_entries * +get_supported_features(struct cxl_features_state *cxlfs) +{ + int remain_feats, max_size, max_feats, start, rc, hdr_size; + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + int feat_size = sizeof(struct cxl_feat_entry); + struct cxl_mbox_get_sup_feats_in mbox_in; + struct cxl_feat_entry *entry; + struct cxl_mbox_cmd mbox_cmd; + int user_feats = 0; + int count; + + count = cxl_get_supported_features_count(cxl_mbox); + if (count <= 0) + return NULL; + + struct cxl_feat_entries *entries __free(kvfree) = + kvmalloc(struct_size(entries, ent, count), GFP_KERNEL); + if (!entries) + return NULL; + + struct cxl_mbox_get_sup_feats_out *mbox_out __free(kvfree) = + kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); + if (!mbox_out) + return NULL; + + hdr_size = struct_size(mbox_out, ents, 0); + max_size = cxl_mbox->payload_size - hdr_size; + /* max feat entries that can fit in mailbox max payload size */ + max_feats = max_size / feat_size; + entry = entries->ent; + + start = 0; + remain_feats = count; + do { + int retrieved, alloc_size, copy_feats; + int num_entries; + + if (remain_feats > max_feats) { + alloc_size = struct_size(mbox_out, ents, max_feats); + remain_feats = remain_feats - max_feats; + copy_feats = max_feats; + } else { + alloc_size = struct_size(mbox_out, ents, remain_feats); + copy_feats = remain_feats; + remain_feats = 0; + } + + memset(&mbox_in, 0, sizeof(mbox_in)); + mbox_in.count = cpu_to_le32(alloc_size); + mbox_in.start_idx = cpu_to_le16(start); + memset(mbox_out, 0, alloc_size); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES, + .size_in = sizeof(mbox_in), + .payload_in = &mbox_in, + .size_out = alloc_size, + .payload_out = mbox_out, + .min_out = hdr_size, + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0) + return NULL; + + if (mbox_cmd.size_out <= hdr_size) + return NULL; + + /* + * Make sure retrieved out buffer is multiple of feature + * entries. + */ + retrieved = mbox_cmd.size_out - hdr_size; + if (retrieved % feat_size) + return NULL; + + num_entries = le16_to_cpu(mbox_out->num_entries); + /* + * If the reported output entries * defined entry size != + * retrieved output bytes, then the output package is incorrect. + */ + if (num_entries * feat_size != retrieved) + return NULL; + + memcpy(entry, mbox_out->ents, retrieved); + for (int i = 0; i < num_entries; i++) { + if (!is_cxl_feature_exclusive(entry + i)) + user_feats++; + } + entry += num_entries; + /* + * If the number of output entries is less than expected, add the + * remaining entries to the next batch. + */ + remain_feats += copy_feats - num_entries; + start += num_entries; + } while (remain_feats); + + entries->num_features = count; + entries->num_user_features = user_feats; + + return no_free_ptr(entries); +} + +static void free_cxlfs(void *_cxlfs) +{ + struct cxl_features_state *cxlfs = _cxlfs; + struct cxl_dev_state *cxlds = cxlfs->cxlds; + + cxlds->cxlfs = NULL; + kvfree(cxlfs->entries); + kfree(cxlfs); +} + +/** + * devm_cxl_setup_features() - Allocate and initialize features context + * @cxlds: CXL device context + * + * Return 0 on success or -errno on failure. + */ +int devm_cxl_setup_features(struct cxl_dev_state *cxlds) +{ + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; + + if (cxl_mbox->feat_cap < CXL_FEATURES_RO) + return -ENODEV; + + struct cxl_features_state *cxlfs __free(kfree) = + kzalloc(sizeof(*cxlfs), GFP_KERNEL); + if (!cxlfs) + return -ENOMEM; + + cxlfs->cxlds = cxlds; + + cxlfs->entries = get_supported_features(cxlfs); + if (!cxlfs->entries) + return -ENOMEM; + + cxlds->cxlfs = cxlfs; + + return devm_add_action_or_reset(cxlds->dev, free_cxlfs, no_free_ptr(cxlfs)); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_features, "CXL"); + +size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, + enum cxl_get_feat_selection selection, + void *feat_out, size_t feat_out_size, u16 offset, + u16 *return_code) +{ + size_t data_to_rd_size, size_out; + struct cxl_mbox_get_feat_in pi; + struct cxl_mbox_cmd mbox_cmd; + size_t data_rcvd_size = 0; + int rc; + + if (return_code) + *return_code = CXL_MBOX_CMD_RC_INPUT; + + if (!feat_out || !feat_out_size) + return 0; + + size_out = min(feat_out_size, cxl_mbox->payload_size); + uuid_copy(&pi.uuid, feat_uuid); + pi.selection = selection; + do { + data_to_rd_size = min(feat_out_size - data_rcvd_size, + cxl_mbox->payload_size); + pi.offset = cpu_to_le16(offset + data_rcvd_size); + pi.count = cpu_to_le16(data_to_rd_size); + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_FEATURE, + .size_in = sizeof(pi), + .payload_in = &pi, + .size_out = size_out, + .payload_out = feat_out + data_rcvd_size, + .min_out = data_to_rd_size, + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0 || !mbox_cmd.size_out) { + if (return_code) + *return_code = mbox_cmd.return_code; + return 0; + } + data_rcvd_size += mbox_cmd.size_out; + } while (data_rcvd_size < feat_out_size); + + if (return_code) + *return_code = CXL_MBOX_CMD_RC_SUCCESS; + + return data_rcvd_size; +} + +/* + * FEAT_DATA_MIN_PAYLOAD_SIZE - min extra number of bytes should be + * available in the mailbox for storing the actual feature data so that + * the feature data transfer would work as expected. + */ +#define FEAT_DATA_MIN_PAYLOAD_SIZE 10 +int cxl_set_feature(struct cxl_mailbox *cxl_mbox, + const uuid_t *feat_uuid, u8 feat_version, + const void *feat_data, size_t feat_data_size, + u32 feat_flag, u16 offset, u16 *return_code) +{ + size_t data_in_size, data_sent_size = 0; + struct cxl_mbox_cmd mbox_cmd; + size_t hdr_size; + + if (return_code) + *return_code = CXL_MBOX_CMD_RC_INPUT; + + struct cxl_mbox_set_feat_in *pi __free(kfree) = + kzalloc(cxl_mbox->payload_size, GFP_KERNEL); + if (!pi) + return -ENOMEM; + + uuid_copy(&pi->uuid, feat_uuid); + pi->version = feat_version; + feat_flag &= ~CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK; + feat_flag |= CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET; + hdr_size = sizeof(pi->hdr); + /* + * Check minimum mbox payload size is available for + * the feature data transfer. + */ + if (hdr_size + FEAT_DATA_MIN_PAYLOAD_SIZE > cxl_mbox->payload_size) + return -ENOMEM; + + if (hdr_size + feat_data_size <= cxl_mbox->payload_size) { + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER); + data_in_size = feat_data_size; + } else { + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER); + data_in_size = cxl_mbox->payload_size - hdr_size; + } + + do { + int rc; + + pi->offset = cpu_to_le16(offset + data_sent_size); + memcpy(pi->feat_data, feat_data + data_sent_size, data_in_size); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_FEATURE, + .size_in = hdr_size + data_in_size, + .payload_in = pi, + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0) { + if (return_code) + *return_code = mbox_cmd.return_code; + return rc; + } + + data_sent_size += data_in_size; + if (data_sent_size >= feat_data_size) { + if (return_code) + *return_code = CXL_MBOX_CMD_RC_SUCCESS; + return 0; + } + + if ((feat_data_size - data_sent_size) <= (cxl_mbox->payload_size - hdr_size)) { + data_in_size = feat_data_size - data_sent_size; + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER); + } else { + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER); + } + } while (true); +} + +/* FWCTL support */ + +static inline struct cxl_memdev *fwctl_to_memdev(struct fwctl_device *fwctl_dev) +{ + return to_cxl_memdev(fwctl_dev->dev.parent); +} + +static int cxlctl_open_uctx(struct fwctl_uctx *uctx) +{ + return 0; +} + +static void cxlctl_close_uctx(struct fwctl_uctx *uctx) +{ +} + +static struct cxl_feat_entry * +get_support_feature_info(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in) +{ + struct cxl_feat_entry *feat; + const uuid_t *uuid; + + if (rpc_in->op_size < sizeof(uuid)) + return ERR_PTR(-EINVAL); + + uuid = &rpc_in->set_feat_in.uuid; + + for (int i = 0; i < cxlfs->entries->num_features; i++) { + feat = &cxlfs->entries->ent[i]; + if (uuid_equal(uuid, &feat->uuid)) + return feat; + } + + return ERR_PTR(-EINVAL); +} + +static void *cxlctl_get_supported_features(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len) +{ + const struct cxl_mbox_get_sup_feats_in *feat_in; + struct cxl_mbox_get_sup_feats_out *feat_out; + struct cxl_feat_entry *pos; + size_t out_size; + int requested; + u32 count; + u16 start; + int i; + + if (rpc_in->op_size != sizeof(*feat_in)) + return ERR_PTR(-EINVAL); + + feat_in = &rpc_in->get_sup_feats_in; + count = le32_to_cpu(feat_in->count); + start = le16_to_cpu(feat_in->start_idx); + requested = count / sizeof(*pos); + + /* + * Make sure that the total requested number of entries is not greater + * than the total number of supported features allowed for userspace. + */ + if (start >= cxlfs->entries->num_features) + return ERR_PTR(-EINVAL); + + requested = min_t(int, requested, cxlfs->entries->num_features - start); + + out_size = sizeof(struct fwctl_rpc_cxl_out) + + struct_size(feat_out, ents, requested); + + struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) = + kvzalloc(out_size, GFP_KERNEL); + if (!rpc_out) + return ERR_PTR(-ENOMEM); + + rpc_out->size = struct_size(feat_out, ents, requested); + feat_out = &rpc_out->get_sup_feats_out; + if (requested == 0) { + feat_out->num_entries = cpu_to_le16(requested); + feat_out->supported_feats = + cpu_to_le16(cxlfs->entries->num_features); + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + *out_len = out_size; + return no_free_ptr(rpc_out); + } + + for (i = start, pos = &feat_out->ents[0]; + i < cxlfs->entries->num_features; i++, pos++) { + if (i - start == requested) + break; + + memcpy(pos, &cxlfs->entries->ent[i], sizeof(*pos)); + /* + * If the feature is exclusive, set the set_feat_size to 0 to + * indicate that the feature is not changeable. + */ + if (is_cxl_feature_exclusive(pos)) { + u32 flags; + + pos->set_feat_size = 0; + flags = le32_to_cpu(pos->flags); + flags &= ~CXL_FEATURE_F_CHANGEABLE; + pos->flags = cpu_to_le32(flags); + } + } + + feat_out->num_entries = cpu_to_le16(requested); + feat_out->supported_feats = cpu_to_le16(cxlfs->entries->num_features); + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + *out_len = out_size; + + return no_free_ptr(rpc_out); +} + +static void *cxlctl_get_feature(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len) +{ + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + const struct cxl_mbox_get_feat_in *feat_in; + u16 offset, count, return_code; + size_t out_size = *out_len; + + if (rpc_in->op_size != sizeof(*feat_in)) + return ERR_PTR(-EINVAL); + + feat_in = &rpc_in->get_feat_in; + offset = le16_to_cpu(feat_in->offset); + count = le16_to_cpu(feat_in->count); + + if (!count) + return ERR_PTR(-EINVAL); + + struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) = + kvzalloc(out_size, GFP_KERNEL); + if (!rpc_out) + return ERR_PTR(-ENOMEM); + + out_size = cxl_get_feature(cxl_mbox, &feat_in->uuid, + feat_in->selection, rpc_out->payload, + count, offset, &return_code); + *out_len = sizeof(struct fwctl_rpc_cxl_out); + if (!out_size) { + rpc_out->size = 0; + rpc_out->retval = return_code; + return no_free_ptr(rpc_out); + } + + rpc_out->size = out_size; + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + *out_len += out_size; + + return no_free_ptr(rpc_out); +} + +static void *cxlctl_set_feature(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len) +{ + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + const struct cxl_mbox_set_feat_in *feat_in; + size_t out_size, data_size; + u16 offset, return_code; + u32 flags; + int rc; + + if (rpc_in->op_size <= sizeof(feat_in->hdr)) + return ERR_PTR(-EINVAL); + + feat_in = &rpc_in->set_feat_in; + + if (is_cxl_feature_exclusive_by_uuid(&feat_in->uuid)) + return ERR_PTR(-EPERM); + + offset = le16_to_cpu(feat_in->offset); + flags = le32_to_cpu(feat_in->flags); + out_size = *out_len; + + struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) = + kvzalloc(out_size, GFP_KERNEL); + if (!rpc_out) + return ERR_PTR(-ENOMEM); + + rpc_out->size = 0; + + data_size = rpc_in->op_size - sizeof(feat_in->hdr); + rc = cxl_set_feature(cxl_mbox, &feat_in->uuid, + feat_in->version, feat_in->feat_data, + data_size, flags, offset, &return_code); + if (rc) { + rpc_out->retval = return_code; + return no_free_ptr(rpc_out); + } + + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + *out_len = sizeof(*rpc_out); + + return no_free_ptr(rpc_out); +} + +static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + enum fwctl_rpc_scope scope) +{ + u16 effects, imm_mask, reset_mask; + struct cxl_feat_entry *feat; + u32 flags; + + feat = get_support_feature_info(cxlfs, rpc_in); + if (IS_ERR(feat)) + return false; + + /* Ensure that the attribute is changeable */ + flags = le32_to_cpu(feat->flags); + if (!(flags & CXL_FEATURE_F_CHANGEABLE)) + return false; + + effects = le16_to_cpu(feat->effects); + + /* + * Reserved bits are set, rejecting since the effects is not + * comprehended by the driver. + */ + if (effects & CXL_CMD_EFFECTS_RESERVED) { + dev_warn_once(cxlfs->cxlds->dev, + "Reserved bits set in the Feature effects field!\n"); + return false; + } + + /* Currently no user background command support */ + if (effects & CXL_CMD_BACKGROUND) + return false; + + /* Effects cause immediate change, highest security scope is needed */ + imm_mask = CXL_CMD_CONFIG_CHANGE_IMMEDIATE | + CXL_CMD_DATA_CHANGE_IMMEDIATE | + CXL_CMD_POLICY_CHANGE_IMMEDIATE | + CXL_CMD_LOG_CHANGE_IMMEDIATE; + + reset_mask = CXL_CMD_CONFIG_CHANGE_COLD_RESET | + CXL_CMD_CONFIG_CHANGE_CONV_RESET | + CXL_CMD_CONFIG_CHANGE_CXL_RESET; + + /* If no immediate or reset effect set, The hardware has a bug */ + if (!(effects & imm_mask) && !(effects & reset_mask)) + return false; + + /* + * If the Feature setting causes immediate configuration change + * then we need the full write permission policy. + */ + if (effects & imm_mask && scope >= FWCTL_RPC_DEBUG_WRITE_FULL) + return true; + + /* + * If the Feature setting only causes configuration change + * after a reset, then the lesser level of write permission + * policy is ok. + */ + if (!(effects & imm_mask) && scope >= FWCTL_RPC_DEBUG_WRITE) + return true; + + return false; +} + +static bool cxlctl_validate_hw_command(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + enum fwctl_rpc_scope scope, + u16 opcode) +{ + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + + switch (opcode) { + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + case CXL_MBOX_OP_GET_FEATURE: + if (cxl_mbox->feat_cap < CXL_FEATURES_RO) + return false; + if (scope >= FWCTL_RPC_CONFIGURATION) + return true; + return false; + case CXL_MBOX_OP_SET_FEATURE: + if (cxl_mbox->feat_cap < CXL_FEATURES_RW) + return false; + return cxlctl_validate_set_features(cxlfs, rpc_in, scope); + default: + return false; + } +} + +static void *cxlctl_handle_commands(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len, u16 opcode) +{ + switch (opcode) { + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + return cxlctl_get_supported_features(cxlfs, rpc_in, out_len); + case CXL_MBOX_OP_GET_FEATURE: + return cxlctl_get_feature(cxlfs, rpc_in, out_len); + case CXL_MBOX_OP_SET_FEATURE: + return cxlctl_set_feature(cxlfs, rpc_in, out_len); + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static void *cxlctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, + void *in, size_t in_len, size_t *out_len) +{ + struct fwctl_device *fwctl_dev = uctx->fwctl; + struct cxl_memdev *cxlmd = fwctl_to_memdev(fwctl_dev); + struct cxl_features_state *cxlfs = to_cxlfs(cxlmd->cxlds); + const struct fwctl_rpc_cxl *rpc_in = in; + u16 opcode = rpc_in->opcode; + + if (!cxlctl_validate_hw_command(cxlfs, rpc_in, scope, opcode)) + return ERR_PTR(-EINVAL); + + return cxlctl_handle_commands(cxlfs, rpc_in, out_len, opcode); +} + +static const struct fwctl_ops cxlctl_ops = { + .device_type = FWCTL_DEVICE_TYPE_CXL, + .uctx_size = sizeof(struct fwctl_uctx), + .open_uctx = cxlctl_open_uctx, + .close_uctx = cxlctl_close_uctx, + .fw_rpc = cxlctl_fw_rpc, +}; + +DEFINE_FREE(free_fwctl_dev, struct fwctl_device *, if (_T) fwctl_put(_T)) + +static void free_memdev_fwctl(void *_fwctl_dev) +{ + struct fwctl_device *fwctl_dev = _fwctl_dev; + + fwctl_unregister(fwctl_dev); + fwctl_put(fwctl_dev); +} + +int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_features_state *cxlfs; + int rc; + + cxlfs = to_cxlfs(cxlds); + if (!cxlfs) + return -ENODEV; + + /* No need to setup FWCTL if there are no user allowed features found */ + if (!cxlfs->entries->num_user_features) + return -ENODEV; + + struct fwctl_device *fwctl_dev __free(free_fwctl_dev) = + _fwctl_alloc_device(&cxlmd->dev, &cxlctl_ops, sizeof(*fwctl_dev)); + if (!fwctl_dev) + return -ENOMEM; + + rc = fwctl_register(fwctl_dev); + if (rc) + return rc; + + return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl, + no_free_ptr(fwctl_dev)); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL"); + +MODULE_IMPORT_NS("FWCTL"); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..78c5346e3e89 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -349,40 +349,39 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) return true; } -static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, - struct cxl_memdev_state *mds, u16 opcode, +static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd, + struct cxl_mailbox *cxl_mbox, u16 opcode, size_t in_size, size_t out_size, u64 in_payload) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - *mbox = (struct cxl_mbox_cmd) { + *mbox_cmd = (struct cxl_mbox_cmd) { .opcode = opcode, .size_in = in_size, }; if (in_size) { - mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), - in_size); - if (IS_ERR(mbox->payload_in)) - return PTR_ERR(mbox->payload_in); + mbox_cmd->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), + in_size); + if (IS_ERR(mbox_cmd->payload_in)) + return PTR_ERR(mbox_cmd->payload_in); - if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { - dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n", + if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) { + dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n", cxl_mem_opcode_to_name(opcode)); - kvfree(mbox->payload_in); + kvfree(mbox_cmd->payload_in); return -EBUSY; } } /* Prepare to handle a full payload for variable sized output */ if (out_size == CXL_VARIABLE_PAYLOAD) - mbox->size_out = cxl_mbox->payload_size; + mbox_cmd->size_out = cxl_mbox->payload_size; else - mbox->size_out = out_size; + mbox_cmd->size_out = out_size; - if (mbox->size_out) { - mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL); - if (!mbox->payload_out) { - kvfree(mbox->payload_in); + if (mbox_cmd->size_out) { + mbox_cmd->payload_out = kvzalloc(mbox_cmd->size_out, GFP_KERNEL); + if (!mbox_cmd->payload_out) { + kvfree(mbox_cmd->payload_in); return -ENOMEM; } } @@ -397,10 +396,8 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox) static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, - struct cxl_memdev_state *mds) + struct cxl_mailbox *cxl_mbox) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - if (send_cmd->raw.rsvd) return -EINVAL; @@ -415,7 +412,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) return -EPERM; - dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n"); + dev_WARN_ONCE(cxl_mbox->host, true, "raw command path used\n"); *mem_cmd = (struct cxl_mem_command) { .info = { @@ -431,7 +428,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, - struct cxl_memdev_state *mds) + struct cxl_mailbox *cxl_mbox) { struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; const struct cxl_command_info *info = &c->info; @@ -446,11 +443,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, return -EINVAL; /* Check that the command is enabled for hardware */ - if (!test_bit(info->id, mds->enabled_cmds)) + if (!test_bit(info->id, cxl_mbox->enabled_cmds)) return -ENOTTY; /* Check that the command is not claimed for exclusive kernel use */ - if (test_bit(info->id, mds->exclusive_cmds)) + if (test_bit(info->id, cxl_mbox->exclusive_cmds)) return -EBUSY; /* Check the input buffer is the expected size */ @@ -479,7 +476,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, /** * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. - * @mds: The driver data for the operation + * @cxl_mbox: CXL mailbox context * @send_cmd: &struct cxl_send_command copied in from userspace. * * Return: @@ -494,10 +491,9 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * safe to send to the hardware. */ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, - struct cxl_memdev_state *mds, + struct cxl_mailbox *cxl_mbox, const struct cxl_send_command *send_cmd) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mem_command mem_cmd; int rc; @@ -514,24 +510,23 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, /* Sanitize and construct a cxl_mem_command */ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) - rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds); + rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxl_mbox); else - rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds); + rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxl_mbox); if (rc) return rc; /* Sanitize and construct a cxl_mbox_cmd */ - return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode, + return cxl_mbox_cmd_ctor(mbox_cmd, cxl_mbox, mem_cmd.opcode, mem_cmd.info.size_in, mem_cmd.info.size_out, send_cmd->in.payload); } -int cxl_query_cmd(struct cxl_memdev *cxlmd, +int cxl_query_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_mem_query_commands __user *q) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct device *dev = &cxlmd->dev; + struct device *dev = cxl_mbox->host; struct cxl_mem_command *cmd; u32 n_commands; int j = 0; @@ -552,9 +547,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, cxl_for_each_cmd(cmd) { struct cxl_command_info info = cmd->info; - if (test_bit(info.id, mds->enabled_cmds)) + if (test_bit(info.id, cxl_mbox->enabled_cmds)) info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED; - if (test_bit(info.id, mds->exclusive_cmds)) + if (test_bit(info.id, cxl_mbox->exclusive_cmds)) info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE; if (copy_to_user(&q->commands[j++], &info, sizeof(info))) @@ -569,7 +564,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, /** * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. - * @mds: The driver data for the operation + * @cxl_mbox: The mailbox context for the operation. * @mbox_cmd: The validated mailbox command. * @out_payload: Pointer to userspace's output payload. * @size_out: (Input) Max payload size to copy out. @@ -590,13 +585,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * * See cxl_send_cmd(). */ -static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds, +static int handle_mailbox_cmd_from_user(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *mbox_cmd, u64 out_payload, s32 *size_out, u32 *retval) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - struct device *dev = mds->cxlds.dev; + struct device *dev = cxl_mbox->host; int rc; dev_dbg(dev, @@ -633,10 +627,9 @@ out: return rc; } -int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) +int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct device *dev = &cxlmd->dev; + struct device *dev = cxl_mbox->host; struct cxl_send_command send; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -646,11 +639,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxl_mbox, &send); if (rc) return rc; - rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload, + rc = handle_mailbox_cmd_from_user(cxl_mbox, &mbox_cmd, send.out.payload, &send.out.size, &send.retval); if (rc) return rc; @@ -713,6 +706,35 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid, return 0; } +static int check_features_opcodes(u16 opcode, int *ro_cmds, int *wr_cmds) +{ + switch (opcode) { + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + case CXL_MBOX_OP_GET_FEATURE: + (*ro_cmds)++; + return 1; + case CXL_MBOX_OP_SET_FEATURE: + (*wr_cmds)++; + return 1; + default: + return 0; + } +} + +/* 'Get Supported Features' and 'Get Feature' */ +#define MAX_FEATURES_READ_CMDS 2 +static void set_features_cap(struct cxl_mailbox *cxl_mbox, + int ro_cmds, int wr_cmds) +{ + /* Setting up Features capability while walking the CEL */ + if (ro_cmds == MAX_FEATURES_READ_CMDS) { + if (wr_cmds) + cxl_mbox->feat_cap = CXL_FEATURES_RW; + else + cxl_mbox->feat_cap = CXL_FEATURES_RO; + } +} + /** * cxl_walk_cel() - Walk through the Command Effects Log. * @mds: The driver data for the operation @@ -724,10 +746,11 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid, */ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_cel_entry *cel_entry; const int cel_entries = size / sizeof(*cel_entry); struct device *dev = mds->cxlds.dev; - int i; + int i, ro_cmds = 0, wr_cmds = 0; cel_entry = (struct cxl_cel_entry *) cel; @@ -737,10 +760,13 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) int enabled = 0; if (cmd) { - set_bit(cmd->info.id, mds->enabled_cmds); + set_bit(cmd->info.id, cxl_mbox->enabled_cmds); enabled++; } + enabled += check_features_opcodes(opcode, &ro_cmds, + &wr_cmds); + if (cxl_is_poison_command(opcode)) { cxl_set_poison_cmd_enabled(&mds->poison, opcode); enabled++; @@ -754,6 +780,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) dev_dbg(dev, "Opcode 0x%04x %s\n", opcode, enabled ? "enabled" : "unsupported by driver"); } + + set_features_cap(cxl_mbox, ro_cmds, wr_cmds); } static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds) @@ -807,6 +835,7 @@ static const uuid_t log_uuid[] = { */ int cxl_enumerate_cmds(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_supported_logs *gsl; struct device *dev = mds->cxlds.dev; struct cxl_mem_command *cmd; @@ -845,7 +874,7 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds) /* In case CEL was bogus, enable some default commands. */ cxl_for_each_cmd(cmd) if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE) - set_bit(cmd->info.id, mds->enabled_cmds); + set_bit(cmd->info.id, cxl_mbox->enabled_cmds); /* Found the required CEL */ rc = 0; @@ -1448,6 +1477,7 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mutex_init(&mds->event.log_lock); mds->cxlds.dev = dev; mds->cxlds.reg_map.host = dev; + mds->cxlds.cxl_mbox.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index ae3dfcbe8938..2e2e035abdaa 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -564,9 +564,11 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + down_write(&cxl_memdev_rwsem); - bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds, - CXL_MEM_COMMAND_ID_MAX); + bitmap_or(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds, + cmds, CXL_MEM_COMMAND_ID_MAX); up_write(&cxl_memdev_rwsem); } EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); @@ -579,9 +581,11 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + down_write(&cxl_memdev_rwsem); - bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds, - CXL_MEM_COMMAND_ID_MAX); + bitmap_andnot(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds, + cmds, CXL_MEM_COMMAND_ID_MAX); up_write(&cxl_memdev_rwsem); } EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL"); @@ -656,11 +660,14 @@ err: static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, unsigned long arg) { + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + switch (cmd) { case CXL_MEM_QUERY_COMMANDS: - return cxl_query_cmd(cxlmd, (void __user *)arg); + return cxl_query_cmd(cxl_mbox, (void __user *)arg); case CXL_MEM_SEND_COMMAND: - return cxl_send_cmd(cxlmd, (void __user *)arg); + return cxl_send_cmd(cxl_mbox, (void __user *)arg); default: return -ENOTTY; } @@ -994,10 +1001,11 @@ static void cxl_remove_fw_upload(void *fwl) int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds) { struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; struct device *dev = &cxlds->cxlmd->dev; struct fw_upload *fwl; - if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds)) + if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, cxl_mbox->enabled_cmds)) return 0; fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev), diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2a25d1957ddb..dd2b7060d501 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -106,42 +106,6 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); } -/** - * struct cxl_mbox_cmd - A command to be submitted to hardware. - * @opcode: (input) The command set and command submitted to hardware. - * @payload_in: (input) Pointer to the input payload. - * @payload_out: (output) Pointer to the output payload. Must be allocated by - * the caller. - * @size_in: (input) Number of bytes to load from @payload_in. - * @size_out: (input) Max number of bytes loaded into @payload_out. - * (output) Number of bytes generated by the device. For fixed size - * outputs commands this is always expected to be deterministic. For - * variable sized output commands, it tells the exact number of bytes - * written. - * @min_out: (input) internal command output payload size validation - * @poll_count: (input) Number of timeouts to attempt. - * @poll_interval_ms: (input) Time between mailbox background command polling - * interval timeouts. - * @return_code: (output) Error code returned from hardware. - * - * This is the primary mechanism used to send commands to the hardware. - * All the fields except @payload_* correspond exactly to the fields described in - * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and - * @payload_out are written to, and read from the Command Payload Registers - * defined in CXL 2.0 8.2.8.4.8. - */ -struct cxl_mbox_cmd { - u16 opcode; - void *payload_in; - void *payload_out; - size_t size_in; - size_t size_out; - size_t min_out; - int poll_count; - int poll_interval_ms; - u16 return_code; -}; - /* * Per CXL 3.0 Section 8.2.8.4.5.1 */ @@ -428,6 +392,7 @@ struct cxl_dpa_perf { * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device * @cxl_mbox: CXL mailbox context + * @cxlfs: CXL features context */ struct cxl_dev_state { struct device *dev; @@ -443,6 +408,9 @@ struct cxl_dev_state { u64 serial; enum cxl_devtype type; struct cxl_mailbox cxl_mbox; +#ifdef CONFIG_CXL_FEATURES + struct cxl_features_state *cxlfs; +#endif }; static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) @@ -461,8 +429,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) * @lsa_size: Size of Label Storage Area * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) * @firmware_version: Firmware version for the memory device. - * @enabled_cmds: Hardware commands found enabled in CEL. - * @exclusive_cmds: Commands that are kernel-internal only * @total_bytes: sum of all possible capacities * @volatile_only_bytes: hard volatile capacity * @persistent_only_bytes: hard persistent capacity @@ -485,8 +451,6 @@ struct cxl_memdev_state { struct cxl_dev_state cxlds; size_t lsa_size; char firmware_version[0x10]; - DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); - DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); u64 total_bytes; u64 volatile_only_bytes; u64 persistent_only_bytes; @@ -530,6 +494,9 @@ enum cxl_opcode { CXL_MBOX_OP_GET_LOG_CAPS = 0x0402, CXL_MBOX_OP_CLEAR_LOG = 0x0403, CXL_MBOX_OP_GET_SUP_LOG_SUBLIST = 0x0405, + CXL_MBOX_OP_GET_SUPPORTED_FEATURES = 0x0500, + CXL_MBOX_OP_GET_FEATURE = 0x0501, + CXL_MBOX_OP_SET_FEATURE = 0x0502, CXL_MBOX_OP_IDENTIFY = 0x4000, CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100, CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101, diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index a96e54c6259e..993fa60fe453 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -997,6 +997,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = devm_cxl_setup_features(cxlds); + if (rc) + dev_dbg(&pdev->dev, "No CXL Features discovered\n"); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); @@ -1009,6 +1013,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = devm_cxl_setup_fwctl(cxlmd); + if (rc) + dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); + pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); if (pmu_count < 0) return pmu_count; diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig new file mode 100644 index 000000000000..b5583b12a011 --- /dev/null +++ b/drivers/fwctl/Kconfig @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0-only +menuconfig FWCTL + tristate "fwctl device firmware access framework" + help + fwctl provides a userspace API for restricted access to communicate + with on-device firmware. The communication channel is intended to + support a wide range of lockdown compatible device behaviors including + manipulating device FLASH, debugging, and other activities that don't + fit neatly into an existing subsystem. + +if FWCTL +config FWCTL_MLX5 + tristate "mlx5 ConnectX control fwctl driver" + depends on MLX5_CORE + help + MLX5 provides interface for the user process to access the debug and + configuration registers of the ConnectX hardware family + (NICs, PCI switches and SmartNIC SoCs). + This will allow configuration and debug tools to work out of the box on + mainstream kernel. + + If you don't know what to do here, say N. + +config FWCTL_PDS + tristate "AMD/Pensando pds fwctl driver" + depends on PDS_CORE + help + The pds_fwctl driver provides an fwctl interface for a user process + to access the debug and configuration information of the AMD/Pensando + DSC hardware family. + + If you don't know what to do here, say N. +endif diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile new file mode 100644 index 000000000000..c093b5f661d6 --- /dev/null +++ b/drivers/fwctl/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_FWCTL) += fwctl.o +obj-$(CONFIG_FWCTL_MLX5) += mlx5/ +obj-$(CONFIG_FWCTL_PDS) += pds/ + +fwctl-y += main.o diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c new file mode 100644 index 000000000000..cb1ac9c40239 --- /dev/null +++ b/drivers/fwctl/main.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + */ +#define pr_fmt(fmt) "fwctl: " fmt +#include <linux/fwctl.h> + +#include <linux/container_of.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +#include <uapi/fwctl/fwctl.h> + +enum { + FWCTL_MAX_DEVICES = 4096, + MAX_RPC_LEN = SZ_2M, +}; +static_assert(FWCTL_MAX_DEVICES < (1U << MINORBITS)); + +static dev_t fwctl_dev; +static DEFINE_IDA(fwctl_ida); +static unsigned long fwctl_tainted; + +struct fwctl_ucmd { + struct fwctl_uctx *uctx; + void __user *ubuffer; + void *cmd; + u32 user_size; +}; + +static int ucmd_respond(struct fwctl_ucmd *ucmd, size_t cmd_len) +{ + if (copy_to_user(ucmd->ubuffer, ucmd->cmd, + min_t(size_t, ucmd->user_size, cmd_len))) + return -EFAULT; + return 0; +} + +static int copy_to_user_zero_pad(void __user *to, const void *from, + size_t from_len, size_t user_len) +{ + size_t copy_len; + + copy_len = min(from_len, user_len); + if (copy_to_user(to, from, copy_len)) + return -EFAULT; + if (copy_len < user_len) { + if (clear_user(to + copy_len, user_len - copy_len)) + return -EFAULT; + } + return 0; +} + +static int fwctl_cmd_info(struct fwctl_ucmd *ucmd) +{ + struct fwctl_device *fwctl = ucmd->uctx->fwctl; + struct fwctl_info *cmd = ucmd->cmd; + size_t driver_info_len = 0; + + if (cmd->flags) + return -EOPNOTSUPP; + + if (!fwctl->ops->info && cmd->device_data_len) { + if (clear_user(u64_to_user_ptr(cmd->out_device_data), + cmd->device_data_len)) + return -EFAULT; + } else if (cmd->device_data_len) { + void *driver_info __free(kfree) = + fwctl->ops->info(ucmd->uctx, &driver_info_len); + if (IS_ERR(driver_info)) + return PTR_ERR(driver_info); + + if (copy_to_user_zero_pad(u64_to_user_ptr(cmd->out_device_data), + driver_info, driver_info_len, + cmd->device_data_len)) + return -EFAULT; + } + + cmd->out_device_type = fwctl->ops->device_type; + cmd->device_data_len = driver_info_len; + return ucmd_respond(ucmd, sizeof(*cmd)); +} + +static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd) +{ + struct fwctl_device *fwctl = ucmd->uctx->fwctl; + struct fwctl_rpc *cmd = ucmd->cmd; + size_t out_len; + + if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN) + return -EMSGSIZE; + + switch (cmd->scope) { + case FWCTL_RPC_CONFIGURATION: + case FWCTL_RPC_DEBUG_READ_ONLY: + break; + + case FWCTL_RPC_DEBUG_WRITE_FULL: + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + fallthrough; + case FWCTL_RPC_DEBUG_WRITE: + if (!test_and_set_bit(0, &fwctl_tainted)) { + dev_warn( + &fwctl->dev, + "%s(%d): has requested full access to the physical device device", + current->comm, task_pid_nr(current)); + add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK); + } + break; + default: + return -EOPNOTSUPP; + } + + void *inbuf __free(kvfree) = kvzalloc(cmd->in_len, GFP_KERNEL_ACCOUNT); + if (!inbuf) + return -ENOMEM; + if (copy_from_user(inbuf, u64_to_user_ptr(cmd->in), cmd->in_len)) + return -EFAULT; + + out_len = cmd->out_len; + void *outbuf __free(kvfree) = fwctl->ops->fw_rpc( + ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len); + if (IS_ERR(outbuf)) + return PTR_ERR(outbuf); + if (outbuf == inbuf) { + /* The driver can re-use inbuf as outbuf */ + inbuf = NULL; + } + + if (copy_to_user(u64_to_user_ptr(cmd->out), outbuf, + min(cmd->out_len, out_len))) + return -EFAULT; + + cmd->out_len = out_len; + return ucmd_respond(ucmd, sizeof(*cmd)); +} + +/* On stack memory for the ioctl structs */ +union fwctl_ucmd_buffer { + struct fwctl_info info; + struct fwctl_rpc rpc; +}; + +struct fwctl_ioctl_op { + unsigned int size; + unsigned int min_size; + unsigned int ioctl_num; + int (*execute)(struct fwctl_ucmd *ucmd); +}; + +#define IOCTL_OP(_ioctl, _fn, _struct, _last) \ + [_IOC_NR(_ioctl) - FWCTL_CMD_BASE] = { \ + .size = sizeof(_struct) + \ + BUILD_BUG_ON_ZERO(sizeof(union fwctl_ucmd_buffer) < \ + sizeof(_struct)), \ + .min_size = offsetofend(_struct, _last), \ + .ioctl_num = _ioctl, \ + .execute = _fn, \ + } +static const struct fwctl_ioctl_op fwctl_ioctl_ops[] = { + IOCTL_OP(FWCTL_INFO, fwctl_cmd_info, struct fwctl_info, out_device_data), + IOCTL_OP(FWCTL_RPC, fwctl_cmd_rpc, struct fwctl_rpc, out), +}; + +static long fwctl_fops_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct fwctl_uctx *uctx = filp->private_data; + const struct fwctl_ioctl_op *op; + struct fwctl_ucmd ucmd = {}; + union fwctl_ucmd_buffer buf; + unsigned int nr; + int ret; + + nr = _IOC_NR(cmd); + if ((nr - FWCTL_CMD_BASE) >= ARRAY_SIZE(fwctl_ioctl_ops)) + return -ENOIOCTLCMD; + + op = &fwctl_ioctl_ops[nr - FWCTL_CMD_BASE]; + if (op->ioctl_num != cmd) + return -ENOIOCTLCMD; + + ucmd.uctx = uctx; + ucmd.cmd = &buf; + ucmd.ubuffer = (void __user *)arg; + ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); + if (ret) + return ret; + + if (ucmd.user_size < op->min_size) + return -EINVAL; + + ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, + ucmd.user_size); + if (ret) + return ret; + + guard(rwsem_read)(&uctx->fwctl->registration_lock); + if (!uctx->fwctl->ops) + return -ENODEV; + return op->execute(&ucmd); +} + +static int fwctl_fops_open(struct inode *inode, struct file *filp) +{ + struct fwctl_device *fwctl = + container_of(inode->i_cdev, struct fwctl_device, cdev); + int ret; + + guard(rwsem_read)(&fwctl->registration_lock); + if (!fwctl->ops) + return -ENODEV; + + struct fwctl_uctx *uctx __free(kfree) = + kzalloc(fwctl->ops->uctx_size, GFP_KERNEL_ACCOUNT); + if (!uctx) + return -ENOMEM; + + uctx->fwctl = fwctl; + ret = fwctl->ops->open_uctx(uctx); + if (ret) + return ret; + + scoped_guard(mutex, &fwctl->uctx_list_lock) { + list_add_tail(&uctx->uctx_list_entry, &fwctl->uctx_list); + } + + get_device(&fwctl->dev); + filp->private_data = no_free_ptr(uctx); + return 0; +} + +static void fwctl_destroy_uctx(struct fwctl_uctx *uctx) +{ + lockdep_assert_held(&uctx->fwctl->uctx_list_lock); + list_del(&uctx->uctx_list_entry); + uctx->fwctl->ops->close_uctx(uctx); +} + +static int fwctl_fops_release(struct inode *inode, struct file *filp) +{ + struct fwctl_uctx *uctx = filp->private_data; + struct fwctl_device *fwctl = uctx->fwctl; + + scoped_guard(rwsem_read, &fwctl->registration_lock) { + /* + * NULL ops means fwctl_unregister() has already removed the + * driver and destroyed the uctx. + */ + if (fwctl->ops) { + guard(mutex)(&fwctl->uctx_list_lock); + fwctl_destroy_uctx(uctx); + } + } + + kfree(uctx); + fwctl_put(fwctl); + return 0; +} + +static const struct file_operations fwctl_fops = { + .owner = THIS_MODULE, + .open = fwctl_fops_open, + .release = fwctl_fops_release, + .unlocked_ioctl = fwctl_fops_ioctl, +}; + +static void fwctl_device_release(struct device *device) +{ + struct fwctl_device *fwctl = + container_of(device, struct fwctl_device, dev); + + ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev); + mutex_destroy(&fwctl->uctx_list_lock); + kfree(fwctl); +} + +static char *fwctl_devnode(const struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "fwctl/%s", dev_name(dev)); +} + +static struct class fwctl_class = { + .name = "fwctl", + .dev_release = fwctl_device_release, + .devnode = fwctl_devnode, +}; + +static struct fwctl_device * +_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size) +{ + struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL); + int devnum; + + if (!fwctl) + return NULL; + + devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL); + if (devnum < 0) + return NULL; + + fwctl->dev.devt = fwctl_dev + devnum; + fwctl->dev.class = &fwctl_class; + fwctl->dev.parent = parent; + + init_rwsem(&fwctl->registration_lock); + mutex_init(&fwctl->uctx_list_lock); + INIT_LIST_HEAD(&fwctl->uctx_list); + + device_initialize(&fwctl->dev); + return_ptr(fwctl); +} + +/* Drivers use the fwctl_alloc_device() wrapper */ +struct fwctl_device *_fwctl_alloc_device(struct device *parent, + const struct fwctl_ops *ops, + size_t size) +{ + struct fwctl_device *fwctl __free(fwctl) = + _alloc_device(parent, ops, size); + + if (!fwctl) + return NULL; + + cdev_init(&fwctl->cdev, &fwctl_fops); + /* + * The driver module is protected by fwctl_register/unregister(), + * unregister won't complete until we are done with the driver's module. + */ + fwctl->cdev.owner = THIS_MODULE; + + if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev)) + return NULL; + + fwctl->ops = ops; + return_ptr(fwctl); +} +EXPORT_SYMBOL_NS_GPL(_fwctl_alloc_device, "FWCTL"); + +/** + * fwctl_register - Register a new device to the subsystem + * @fwctl: Previously allocated fwctl_device + * + * On return the device is visible through sysfs and /dev, driver ops may be + * called. + */ +int fwctl_register(struct fwctl_device *fwctl) +{ + return cdev_device_add(&fwctl->cdev, &fwctl->dev); +} +EXPORT_SYMBOL_NS_GPL(fwctl_register, "FWCTL"); + +/** + * fwctl_unregister - Unregister a device from the subsystem + * @fwctl: Previously allocated and registered fwctl_device + * + * Undoes fwctl_register(). On return no driver ops will be called. The + * caller must still call fwctl_put() to free the fwctl. + * + * Unregister will return even if userspace still has file descriptors open. + * This will call ops->close_uctx() on any open FDs and after return no driver + * op will be called. The FDs remain open but all fops will return -ENODEV. + * + * The design of fwctl allows this sort of disassociation of the driver from the + * subsystem primarily by keeping memory allocations owned by the core subsytem. + * The fwctl_device and fwctl_uctx can both be freed without requiring a driver + * callback. This allows the module to remain unlocked while FDs are open. + */ +void fwctl_unregister(struct fwctl_device *fwctl) +{ + struct fwctl_uctx *uctx; + + cdev_device_del(&fwctl->cdev, &fwctl->dev); + + /* Disable and free the driver's resources for any still open FDs. */ + guard(rwsem_write)(&fwctl->registration_lock); + guard(mutex)(&fwctl->uctx_list_lock); + while ((uctx = list_first_entry_or_null(&fwctl->uctx_list, + struct fwctl_uctx, + uctx_list_entry))) + fwctl_destroy_uctx(uctx); + + /* + * The driver module may unload after this returns, the op pointer will + * not be valid. + */ + fwctl->ops = NULL; +} +EXPORT_SYMBOL_NS_GPL(fwctl_unregister, "FWCTL"); + +static int __init fwctl_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&fwctl_dev, 0, FWCTL_MAX_DEVICES, "fwctl"); + if (ret) + return ret; + + ret = class_register(&fwctl_class); + if (ret) + goto err_chrdev; + return 0; + +err_chrdev: + unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES); + return ret; +} + +static void __exit fwctl_exit(void) +{ + class_unregister(&fwctl_class); + unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES); +} + +module_init(fwctl_init); +module_exit(fwctl_exit); +MODULE_DESCRIPTION("fwctl device firmware access framework"); +MODULE_LICENSE("GPL"); diff --git a/drivers/fwctl/mlx5/Makefile b/drivers/fwctl/mlx5/Makefile new file mode 100644 index 000000000000..139a23e3c7c5 --- /dev/null +++ b/drivers/fwctl/mlx5/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_FWCTL_MLX5) += mlx5_fwctl.o + +mlx5_fwctl-y += main.o diff --git a/drivers/fwctl/mlx5/main.c b/drivers/fwctl/mlx5/main.c new file mode 100644 index 000000000000..f93aa0cecdb9 --- /dev/null +++ b/drivers/fwctl/mlx5/main.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + */ +#include <linux/fwctl.h> +#include <linux/auxiliary_bus.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/driver.h> +#include <uapi/fwctl/mlx5.h> + +#define mlx5ctl_err(mcdev, format, ...) \ + dev_err(&mcdev->fwctl.dev, format, ##__VA_ARGS__) + +#define mlx5ctl_dbg(mcdev, format, ...) \ + dev_dbg(&mcdev->fwctl.dev, "PID %u: " format, current->pid, \ + ##__VA_ARGS__) + +struct mlx5ctl_uctx { + struct fwctl_uctx uctx; + u32 uctx_caps; + u32 uctx_uid; +}; + +struct mlx5ctl_dev { + struct fwctl_device fwctl; + struct mlx5_core_dev *mdev; +}; +DEFINE_FREE(mlx5ctl, struct mlx5ctl_dev *, if (_T) fwctl_put(&_T->fwctl)); + +struct mlx5_ifc_mbox_in_hdr_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_out_hdr_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +enum { + MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES = 0x4, +}; + +enum { + MLX5_CMD_OP_QUERY_DRIVER_VERSION = 0x10c, + MLX5_CMD_OP_QUERY_OTHER_HCA_CAP = 0x10e, + MLX5_CMD_OP_QUERY_RDB = 0x512, + MLX5_CMD_OP_QUERY_PSV = 0x602, + MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716, + MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER = 0x722, + MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT = 0x728, + MLX5_CMD_OP_QUERY_BURST_SIZE = 0x813, + MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS = 0x819, + MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS = 0x820, + MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS = 0x821, + MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS = 0x911, + MLX5_CMD_OP_QUERY_AFU = 0x971, + MLX5_CMD_OP_QUERY_CAPI_PEC = 0x981, + MLX5_CMD_OP_QUERY_UCTX = 0xa05, + MLX5_CMD_OP_QUERY_UMEM = 0xa09, + MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE = 0xb02, + MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO = 0xb03, + MLX5_CMD_OP_QUERY_REGEXP_PARAMS = 0xb05, + MLX5_CMD_OP_QUERY_REGEXP_REGISTER = 0xb07, + MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY = 0xb08, + MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS = 0xb0a, + MLX5_CMD_OP_ACCESS_REGISTER_USER = 0xb0c, + MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING = 0xb0f, + MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO = 0xb13, + MLX5_CMD_OP_QUERY_CRYPTO_STATE = 0xb14, + MLX5_CMD_OP_QUERY_VUID = 0xb22, + MLX5_CMD_OP_QUERY_DPA_PARTITION = 0xb28, + MLX5_CMD_OP_QUERY_DPA_PARTITIONS = 0xb2a, + MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT = 0xb2e, + MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO = 0xb2f, + MLX5_CMD_OP_QUERY_RSV_RESOURCES = 0x8000, + MLX5_CMD_OP_QUERY_MTT = 0x8001, + MLX5_CMD_OP_QUERY_SCHED_QUEUE = 0x8006, +}; + +static int mlx5ctl_alloc_uid(struct mlx5ctl_dev *mcdev, u32 cap) +{ + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; + void *uctx; + int ret; + u16 uid; + + uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); + + mlx5ctl_dbg(mcdev, "%s: caps 0x%x\n", __func__, cap); + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(uctx, uctx, cap, cap); + + ret = mlx5_cmd_exec(mcdev->mdev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + uid = MLX5_GET(create_uctx_out, out, uid); + mlx5ctl_dbg(mcdev, "allocated uid %u with caps 0x%x\n", uid, cap); + return uid; +} + +static void mlx5ctl_release_uid(struct mlx5ctl_dev *mcdev, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + struct mlx5_core_dev *mdev = mcdev->mdev; + int ret; + + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); + + ret = mlx5_cmd_exec_in(mdev, destroy_uctx, in); + mlx5ctl_dbg(mcdev, "released uid %u %pe\n", uid, ERR_PTR(ret)); +} + +static int mlx5ctl_open_uctx(struct fwctl_uctx *uctx) +{ + struct mlx5ctl_uctx *mfd = + container_of(uctx, struct mlx5ctl_uctx, uctx); + struct mlx5ctl_dev *mcdev = + container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl); + int uid; + + /* + * New FW supports the TOOLS_RESOURCES uid security label + * which allows commands to manipulate the global device state. + * Otherwise only basic existing RDMA devx privilege are allowed. + */ + if (MLX5_CAP_GEN(mcdev->mdev, uctx_cap) & + MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES) + mfd->uctx_caps |= MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES; + + uid = mlx5ctl_alloc_uid(mcdev, mfd->uctx_caps); + if (uid < 0) + return uid; + + mfd->uctx_uid = uid; + return 0; +} + +static void mlx5ctl_close_uctx(struct fwctl_uctx *uctx) +{ + struct mlx5ctl_dev *mcdev = + container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl); + struct mlx5ctl_uctx *mfd = + container_of(uctx, struct mlx5ctl_uctx, uctx); + + mlx5ctl_release_uid(mcdev, mfd->uctx_uid); +} + +static void *mlx5ctl_info(struct fwctl_uctx *uctx, size_t *length) +{ + struct mlx5ctl_uctx *mfd = + container_of(uctx, struct mlx5ctl_uctx, uctx); + struct fwctl_info_mlx5 *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + info->uid = mfd->uctx_uid; + info->uctx_caps = mfd->uctx_caps; + *length = sizeof(*info); + return info; +} + +static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope) +{ + u16 opcode = MLX5_GET(mbox_in_hdr, in, opcode); + u16 op_mod = MLX5_GET(mbox_in_hdr, in, op_mod); + + /* + * Currently the driver can't keep track of commands that allocate + * objects in the FW, these commands are safe from a security + * perspective but nothing will free the memory when the FD is closed. + * For now permit only query commands and set commands that don't alter + * objects. Also the caps for the scope have not been defined yet, + * filter commands manually for now. + */ + switch (opcode) { + case MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_OTHER_HCA_CAP: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OPCODE_QUERY_VUID: + /* + * FW limits SET_HCA_CAP on the tools UID to only the other function + * mode which is used for function pre-configuration + */ + case MLX5_CMD_OP_SET_HCA_CAP: + return true; /* scope >= FWCTL_RPC_CONFIGURATION; */ + + case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: + case MLX5_CMD_OP_FPGA_QUERY_QP: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_QUERY_AFU: + case MLX5_CMD_OP_QUERY_BURST_SIZE: + case MLX5_CMD_OP_QUERY_CAPI_PEC: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_QUERY_CRYPTO_STATE: + case MLX5_CMD_OP_QUERY_DC_CNAK_TRACE: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS: + case MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS: + case MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS: + case MLX5_CMD_OP_QUERY_DPA_PARTITION: + case MLX5_CMD_OP_QUERY_DPA_PARTITIONS: + case MLX5_CMD_OP_QUERY_DRIVER_VERSION: + case MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO: + case MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO: + case MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING: + case MLX5_CMD_OP_QUERY_EQ: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: + case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_QUERY_MAD_DEMUX: + case MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_QUERY_MTT: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER: + case MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE: + case MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT: + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + case MLX5_CMD_OP_QUERY_PAGES: + case MLX5_CMD_OP_QUERY_PSV: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_QUERY_RATE_LIMIT: + case MLX5_CMD_OP_QUERY_RDB: + case MLX5_CMD_OP_QUERY_REGEXP_PARAMS: + case MLX5_CMD_OP_QUERY_REGEXP_REGISTER: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_QUERY_RSV_RESOURCES: + case MLX5_CMD_OP_QUERY_SCHED_QUEUE: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SF_PARTITION: + case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_QUERY_UCTX: + case MLX5_CMD_OP_QUERY_UMEM: + case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE: + case MLX5_CMD_OP_QUERY_VHCA_STATE: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_WOL_ROL: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS: + return scope >= FWCTL_RPC_DEBUG_READ_ONLY; + + case MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS: + return scope >= FWCTL_RPC_DEBUG_WRITE; + + case MLX5_CMD_OP_ACCESS_REG: + case MLX5_CMD_OP_ACCESS_REGISTER_USER: + if (op_mod == 0) /* write */ + return true; /* scope >= FWCTL_RPC_CONFIGURATION; */ + return scope >= FWCTL_RPC_DEBUG_READ_ONLY; + default: + return false; + } +} + +static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, + void *rpc_in, size_t in_len, size_t *out_len) +{ + struct mlx5ctl_dev *mcdev = + container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl); + struct mlx5ctl_uctx *mfd = + container_of(uctx, struct mlx5ctl_uctx, uctx); + void *rpc_out; + int ret; + + if (in_len < MLX5_ST_SZ_BYTES(mbox_in_hdr) || + *out_len < MLX5_ST_SZ_BYTES(mbox_out_hdr)) + return ERR_PTR(-EMSGSIZE); + + mlx5ctl_dbg(mcdev, "[UID %d] cmdif: opcode 0x%x inlen %zu outlen %zu\n", + mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode), + in_len, *out_len); + + if (!mlx5ctl_validate_rpc(rpc_in, scope)) + return ERR_PTR(-EBADMSG); + + /* + * mlx5_cmd_do() copies the input message to its own buffer before + * executing it, so we can reuse the allocation for the output. + */ + if (*out_len <= in_len) { + rpc_out = rpc_in; + } else { + rpc_out = kvzalloc(*out_len, GFP_KERNEL); + if (!rpc_out) + return ERR_PTR(-ENOMEM); + } + + /* Enforce the user context for the command */ + MLX5_SET(mbox_in_hdr, rpc_in, uid, mfd->uctx_uid); + ret = mlx5_cmd_do(mcdev->mdev, rpc_in, in_len, rpc_out, *out_len); + + mlx5ctl_dbg(mcdev, + "[UID %d] cmdif: opcode 0x%x status 0x%x retval %pe\n", + mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode), + MLX5_GET(mbox_out_hdr, rpc_out, status), ERR_PTR(ret)); + + /* + * -EREMOTEIO means execution succeeded and the out is valid, + * but an error code was returned inside out. Everything else + * means the RPC did not make it to the device. + */ + if (ret && ret != -EREMOTEIO) { + if (rpc_out != rpc_in) + kfree(rpc_out); + return ERR_PTR(ret); + } + return rpc_out; +} + +static const struct fwctl_ops mlx5ctl_ops = { + .device_type = FWCTL_DEVICE_TYPE_MLX5, + .uctx_size = sizeof(struct mlx5ctl_uctx), + .open_uctx = mlx5ctl_open_uctx, + .close_uctx = mlx5ctl_close_uctx, + .info = mlx5ctl_info, + .fw_rpc = mlx5ctl_fw_rpc, +}; + +static int mlx5ctl_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) + +{ + struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = madev->mdev; + struct mlx5ctl_dev *mcdev __free(mlx5ctl) = fwctl_alloc_device( + &mdev->pdev->dev, &mlx5ctl_ops, struct mlx5ctl_dev, fwctl); + int ret; + + if (!mcdev) + return -ENOMEM; + + mcdev->mdev = mdev; + + ret = fwctl_register(&mcdev->fwctl); + if (ret) + return ret; + auxiliary_set_drvdata(adev, no_free_ptr(mcdev)); + return 0; +} + +static void mlx5ctl_remove(struct auxiliary_device *adev) +{ + struct mlx5ctl_dev *mcdev = auxiliary_get_drvdata(adev); + + fwctl_unregister(&mcdev->fwctl); + fwctl_put(&mcdev->fwctl); +} + +static const struct auxiliary_device_id mlx5ctl_id_table[] = { + {.name = MLX5_ADEV_NAME ".fwctl",}, + {} +}; +MODULE_DEVICE_TABLE(auxiliary, mlx5ctl_id_table); + +static struct auxiliary_driver mlx5ctl_driver = { + .name = "mlx5_fwctl", + .probe = mlx5ctl_probe, + .remove = mlx5ctl_remove, + .id_table = mlx5ctl_id_table, +}; + +module_auxiliary_driver(mlx5ctl_driver); + +MODULE_IMPORT_NS("FWCTL"); +MODULE_DESCRIPTION("mlx5 ConnectX fwctl driver"); +MODULE_AUTHOR("Saeed Mahameed <saeedm@nvidia.com>"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/fwctl/pds/Makefile b/drivers/fwctl/pds/Makefile new file mode 100644 index 000000000000..cc2317c07be1 --- /dev/null +++ b/drivers/fwctl/pds/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_FWCTL_PDS) += pds_fwctl.o + +pds_fwctl-y += main.o diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c new file mode 100644 index 000000000000..284c4165fdd4 --- /dev/null +++ b/drivers/fwctl/pds/main.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) Advanced Micro Devices, Inc */ + +#include <linux/module.h> +#include <linux/auxiliary_bus.h> +#include <linux/pci.h> +#include <linux/vmalloc.h> +#include <linux/bitfield.h> + +#include <uapi/fwctl/fwctl.h> +#include <uapi/fwctl/pds.h> +#include <linux/fwctl.h> + +#include <linux/pds/pds_common.h> +#include <linux/pds/pds_core_if.h> +#include <linux/pds/pds_adminq.h> +#include <linux/pds/pds_auxbus.h> + +struct pdsfc_uctx { + struct fwctl_uctx uctx; + u32 uctx_caps; +}; + +struct pdsfc_rpc_endpoint_info { + u32 endpoint; + dma_addr_t operations_pa; + struct pds_fwctl_query_data *operations; + struct mutex lock; /* lock for endpoint info management */ +}; + +struct pdsfc_dev { + struct fwctl_device fwctl; + struct pds_auxiliary_dev *padev; + u32 caps; + struct pds_fwctl_ident ident; + dma_addr_t endpoints_pa; + struct pds_fwctl_query_data *endpoints; + struct pdsfc_rpc_endpoint_info *endpoint_info; +}; + +static int pdsfc_open_uctx(struct fwctl_uctx *uctx) +{ + struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl); + struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx); + + pdsfc_uctx->uctx_caps = pdsfc->caps; + + return 0; +} + +static void pdsfc_close_uctx(struct fwctl_uctx *uctx) +{ +} + +static void *pdsfc_info(struct fwctl_uctx *uctx, size_t *length) +{ + struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx); + struct fwctl_info_pds *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + info->uctx_caps = pdsfc_uctx->uctx_caps; + + return info; +} + +static int pdsfc_identify(struct pdsfc_dev *pdsfc) +{ + struct device *dev = &pdsfc->fwctl.dev; + union pds_core_adminq_comp comp = {0}; + union pds_core_adminq_cmd cmd; + struct pds_fwctl_ident *ident; + dma_addr_t ident_pa; + int err; + + ident = dma_alloc_coherent(dev->parent, sizeof(*ident), &ident_pa, GFP_KERNEL); + if (!ident) { + dev_err(dev, "Failed to map ident buffer\n"); + return -ENOMEM; + } + + cmd = (union pds_core_adminq_cmd) { + .fwctl_ident = { + .opcode = PDS_FWCTL_CMD_IDENT, + .version = 0, + .len = cpu_to_le32(sizeof(*ident)), + .ident_pa = cpu_to_le64(ident_pa), + } + }; + + err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0); + if (err) + dev_err(dev, "Failed to send adminq cmd opcode: %u err: %d\n", + cmd.fwctl_ident.opcode, err); + else + pdsfc->ident = *ident; + + dma_free_coherent(dev->parent, sizeof(*ident), ident, ident_pa); + + return err; +} + +static void pdsfc_free_endpoints(struct pdsfc_dev *pdsfc) +{ + struct device *dev = &pdsfc->fwctl.dev; + int i; + + if (!pdsfc->endpoints) + return; + + for (i = 0; pdsfc->endpoint_info && i < pdsfc->endpoints->num_entries; i++) + mutex_destroy(&pdsfc->endpoint_info[i].lock); + vfree(pdsfc->endpoint_info); + pdsfc->endpoint_info = NULL; + dma_free_coherent(dev->parent, PAGE_SIZE, + pdsfc->endpoints, pdsfc->endpoints_pa); + pdsfc->endpoints = NULL; + pdsfc->endpoints_pa = DMA_MAPPING_ERROR; +} + +static void pdsfc_free_operations(struct pdsfc_dev *pdsfc) +{ + struct device *dev = &pdsfc->fwctl.dev; + u32 num_endpoints; + int i; + + num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries); + for (i = 0; i < num_endpoints; i++) { + struct pdsfc_rpc_endpoint_info *ei = &pdsfc->endpoint_info[i]; + + if (ei->operations) { + dma_free_coherent(dev->parent, PAGE_SIZE, + ei->operations, ei->operations_pa); + ei->operations = NULL; + ei->operations_pa = DMA_MAPPING_ERROR; + } + } +} + +static struct pds_fwctl_query_data *pdsfc_get_endpoints(struct pdsfc_dev *pdsfc, + dma_addr_t *pa) +{ + struct device *dev = &pdsfc->fwctl.dev; + union pds_core_adminq_comp comp = {0}; + struct pds_fwctl_query_data *data; + union pds_core_adminq_cmd cmd; + dma_addr_t data_pa; + int err; + + data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL); + if (!data) { + dev_err(dev, "Failed to map endpoint list\n"); + return ERR_PTR(-ENOMEM); + } + + cmd = (union pds_core_adminq_cmd) { + .fwctl_query = { + .opcode = PDS_FWCTL_CMD_QUERY, + .entity = PDS_FWCTL_RPC_ROOT, + .version = 0, + .query_data_buf_len = cpu_to_le32(PAGE_SIZE), + .query_data_buf_pa = cpu_to_le64(data_pa), + } + }; + + err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0); + if (err) { + dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n", + cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err); + dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa); + return ERR_PTR(err); + } + + *pa = data_pa; + + return data; +} + +static int pdsfc_init_endpoints(struct pdsfc_dev *pdsfc) +{ + struct pds_fwctl_query_data_endpoint *ep_entry; + u32 num_endpoints; + int i; + + pdsfc->endpoints = pdsfc_get_endpoints(pdsfc, &pdsfc->endpoints_pa); + if (IS_ERR(pdsfc->endpoints)) + return PTR_ERR(pdsfc->endpoints); + + num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries); + pdsfc->endpoint_info = vcalloc(num_endpoints, + sizeof(*pdsfc->endpoint_info)); + if (!pdsfc->endpoint_info) { + pdsfc_free_endpoints(pdsfc); + return -ENOMEM; + } + + ep_entry = (struct pds_fwctl_query_data_endpoint *)pdsfc->endpoints->entries; + for (i = 0; i < num_endpoints; i++) { + mutex_init(&pdsfc->endpoint_info[i].lock); + pdsfc->endpoint_info[i].endpoint = ep_entry[i].id; + } + + return 0; +} + +static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc, + dma_addr_t *pa, u32 ep) +{ + struct pds_fwctl_query_data_operation *entries; + struct device *dev = &pdsfc->fwctl.dev; + union pds_core_adminq_comp comp = {0}; + struct pds_fwctl_query_data *data; + union pds_core_adminq_cmd cmd; + dma_addr_t data_pa; + int err; + int i; + + /* Query the operations list for the given endpoint */ + data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL); + if (!data) { + dev_err(dev, "Failed to map operations list\n"); + return ERR_PTR(-ENOMEM); + } + + cmd = (union pds_core_adminq_cmd) { + .fwctl_query = { + .opcode = PDS_FWCTL_CMD_QUERY, + .entity = PDS_FWCTL_RPC_ENDPOINT, + .version = 0, + .query_data_buf_len = cpu_to_le32(PAGE_SIZE), + .query_data_buf_pa = cpu_to_le64(data_pa), + .ep = cpu_to_le32(ep), + } + }; + + err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0); + if (err) { + dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n", + cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err); + dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa); + return ERR_PTR(err); + } + + *pa = data_pa; + + entries = (struct pds_fwctl_query_data_operation *)data->entries; + dev_dbg(dev, "num_entries %d\n", data->num_entries); + for (i = 0; i < data->num_entries; i++) { + + /* Translate FW command attribute to fwctl scope */ + switch (entries[i].scope) { + case PDSFC_FW_CMD_ATTR_READ: + case PDSFC_FW_CMD_ATTR_WRITE: + case PDSFC_FW_CMD_ATTR_SYNC: + entries[i].scope = FWCTL_RPC_CONFIGURATION; + break; + case PDSFC_FW_CMD_ATTR_DEBUG_READ: + entries[i].scope = FWCTL_RPC_DEBUG_READ_ONLY; + break; + case PDSFC_FW_CMD_ATTR_DEBUG_WRITE: + entries[i].scope = FWCTL_RPC_DEBUG_WRITE; + break; + default: + entries[i].scope = FWCTL_RPC_DEBUG_WRITE_FULL; + break; + } + dev_dbg(dev, "endpoint %d operation: id %x scope %d\n", + ep, entries[i].id, entries[i].scope); + } + + return data; +} + +static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc, + struct fwctl_rpc_pds *rpc, + enum fwctl_rpc_scope scope) +{ + struct pds_fwctl_query_data_operation *op_entry; + struct pdsfc_rpc_endpoint_info *ep_info = NULL; + struct device *dev = &pdsfc->fwctl.dev; + int i; + + /* validate rpc in_len & out_len based + * on ident.max_req_sz & max_resp_sz + */ + if (rpc->in.len > pdsfc->ident.max_req_sz) { + dev_dbg(dev, "Invalid request size %u, max %u\n", + rpc->in.len, pdsfc->ident.max_req_sz); + return -EINVAL; + } + + if (rpc->out.len > pdsfc->ident.max_resp_sz) { + dev_dbg(dev, "Invalid response size %u, max %u\n", + rpc->out.len, pdsfc->ident.max_resp_sz); + return -EINVAL; + } + + for (i = 0; i < pdsfc->endpoints->num_entries; i++) { + if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) { + ep_info = &pdsfc->endpoint_info[i]; + break; + } + } + if (!ep_info) { + dev_dbg(dev, "Invalid endpoint %d\n", rpc->in.ep); + return -EINVAL; + } + + /* query and cache this endpoint's operations */ + mutex_lock(&ep_info->lock); + if (!ep_info->operations) { + struct pds_fwctl_query_data *operations; + + operations = pdsfc_get_operations(pdsfc, + &ep_info->operations_pa, + rpc->in.ep); + if (IS_ERR(operations)) { + mutex_unlock(&ep_info->lock); + return -ENOMEM; + } + ep_info->operations = operations; + } + mutex_unlock(&ep_info->lock); + + /* reject unsupported and/or out of scope commands */ + op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries; + for (i = 0; i < ep_info->operations->num_entries; i++) { + if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) { + if (scope < op_entry[i].scope) + return -EPERM; + return 0; + } + } + + dev_dbg(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep); + + return -EINVAL; +} + +static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, + void *in, size_t in_len, size_t *out_len) +{ + struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl); + struct device *dev = &uctx->fwctl->dev; + union pds_core_adminq_comp comp = {0}; + dma_addr_t out_payload_dma_addr = 0; + dma_addr_t in_payload_dma_addr = 0; + struct fwctl_rpc_pds *rpc = in; + union pds_core_adminq_cmd cmd; + void *out_payload = NULL; + void *in_payload = NULL; + void *out = NULL; + int err; + + err = pdsfc_validate_rpc(pdsfc, rpc, scope); + if (err) + return ERR_PTR(err); + + if (rpc->in.len > 0) { + in_payload = kzalloc(rpc->in.len, GFP_KERNEL); + if (!in_payload) { + dev_err(dev, "Failed to allocate in_payload\n"); + err = -ENOMEM; + goto err_out; + } + + if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload), + rpc->in.len)) { + dev_dbg(dev, "Failed to copy in_payload from user\n"); + err = -EFAULT; + goto err_in_payload; + } + + in_payload_dma_addr = dma_map_single(dev->parent, in_payload, + rpc->in.len, DMA_TO_DEVICE); + err = dma_mapping_error(dev->parent, in_payload_dma_addr); + if (err) { + dev_dbg(dev, "Failed to map in_payload\n"); + goto err_in_payload; + } + } + + if (rpc->out.len > 0) { + out_payload = kzalloc(rpc->out.len, GFP_KERNEL); + if (!out_payload) { + dev_dbg(dev, "Failed to allocate out_payload\n"); + err = -ENOMEM; + goto err_out_payload; + } + + out_payload_dma_addr = dma_map_single(dev->parent, out_payload, + rpc->out.len, DMA_FROM_DEVICE); + err = dma_mapping_error(dev->parent, out_payload_dma_addr); + if (err) { + dev_dbg(dev, "Failed to map out_payload\n"); + goto err_out_payload; + } + } + + cmd = (union pds_core_adminq_cmd) { + .fwctl_rpc = { + .opcode = PDS_FWCTL_CMD_RPC, + .flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP, + .ep = cpu_to_le32(rpc->in.ep), + .op = cpu_to_le32(rpc->in.op), + .req_pa = cpu_to_le64(in_payload_dma_addr), + .req_sz = cpu_to_le32(rpc->in.len), + .resp_pa = cpu_to_le64(out_payload_dma_addr), + .resp_sz = cpu_to_le32(rpc->out.len), + } + }; + + err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0); + if (err) { + dev_dbg(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n", + __func__, rpc->in.ep, rpc->in.op, + cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems, + cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems, + err); + goto done; + } + + dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true); + + if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) { + dev_dbg(dev, "Failed to copy out_payload to user\n"); + out = ERR_PTR(-EFAULT); + goto done; + } + + rpc->out.retval = le32_to_cpu(comp.fwctl_rpc.err); + *out_len = in_len; + out = in; + +done: + if (out_payload_dma_addr) + dma_unmap_single(dev->parent, out_payload_dma_addr, + rpc->out.len, DMA_FROM_DEVICE); +err_out_payload: + kfree(out_payload); + + if (in_payload_dma_addr) + dma_unmap_single(dev->parent, in_payload_dma_addr, + rpc->in.len, DMA_TO_DEVICE); +err_in_payload: + kfree(in_payload); +err_out: + if (err) + return ERR_PTR(err); + + return out; +} + +static const struct fwctl_ops pdsfc_ops = { + .device_type = FWCTL_DEVICE_TYPE_PDS, + .uctx_size = sizeof(struct pdsfc_uctx), + .open_uctx = pdsfc_open_uctx, + .close_uctx = pdsfc_close_uctx, + .info = pdsfc_info, + .fw_rpc = pdsfc_fw_rpc, +}; + +static int pdsfc_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct pds_auxiliary_dev *padev = + container_of(adev, struct pds_auxiliary_dev, aux_dev); + struct device *dev = &adev->dev; + struct pdsfc_dev *pdsfc; + int err; + + pdsfc = fwctl_alloc_device(&padev->vf_pdev->dev, &pdsfc_ops, + struct pdsfc_dev, fwctl); + if (!pdsfc) + return dev_err_probe(dev, -ENOMEM, "Failed to allocate fwctl device struct\n"); + pdsfc->padev = padev; + + err = pdsfc_identify(pdsfc); + if (err) { + fwctl_put(&pdsfc->fwctl); + return dev_err_probe(dev, err, "Failed to identify device\n"); + } + + err = pdsfc_init_endpoints(pdsfc); + if (err) { + fwctl_put(&pdsfc->fwctl); + return dev_err_probe(dev, err, "Failed to init endpoints\n"); + } + + pdsfc->caps = PDS_FWCTL_QUERY_CAP | PDS_FWCTL_SEND_CAP; + + err = fwctl_register(&pdsfc->fwctl); + if (err) { + pdsfc_free_endpoints(pdsfc); + fwctl_put(&pdsfc->fwctl); + return dev_err_probe(dev, err, "Failed to register device\n"); + } + + auxiliary_set_drvdata(adev, pdsfc); + + return 0; +} + +static void pdsfc_remove(struct auxiliary_device *adev) +{ + struct pdsfc_dev *pdsfc = auxiliary_get_drvdata(adev); + + fwctl_unregister(&pdsfc->fwctl); + pdsfc_free_operations(pdsfc); + pdsfc_free_endpoints(pdsfc); + + fwctl_put(&pdsfc->fwctl); +} + +static const struct auxiliary_device_id pdsfc_id_table[] = { + {.name = PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_FWCTL_STR }, + {} +}; +MODULE_DEVICE_TABLE(auxiliary, pdsfc_id_table); + +static struct auxiliary_driver pdsfc_driver = { + .name = "pds_fwctl", + .probe = pdsfc_probe, + .remove = pdsfc_remove, + .id_table = pdsfc_id_table, +}; + +module_auxiliary_driver(pdsfc_driver); + +MODULE_IMPORT_NS("FWCTL"); +MODULE_DESCRIPTION("pds fwctl driver"); +MODULE_AUTHOR("Shannon Nelson <shannon.nelson@amd.com>"); +MODULE_AUTHOR("Brett Creeley <brett.creeley@amd.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index 2babea110991..eeb72b1809ea 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -175,34 +175,32 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf, return padev; } -int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) +void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, + struct pds_auxiliary_dev **pd_ptr) { struct pds_auxiliary_dev *padev; - int err = 0; - if (!cf) - return -ENODEV; + if (!*pd_ptr) + return; mutex_lock(&pf->config_lock); - padev = pf->vfs[cf->vf_id].padev; - if (padev) { - pds_client_unregister(pf, padev->client_id); - auxiliary_device_delete(&padev->aux_dev); - auxiliary_device_uninit(&padev->aux_dev); - padev->client_id = 0; - } - pf->vfs[cf->vf_id].padev = NULL; + padev = *pd_ptr; + pds_client_unregister(pf, padev->client_id); + auxiliary_device_delete(&padev->aux_dev); + auxiliary_device_uninit(&padev->aux_dev); + padev->client_id = 0; + *pd_ptr = NULL; mutex_unlock(&pf->config_lock); - return err; } -int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) +int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf, + enum pds_core_vif_types vt, + struct pds_auxiliary_dev **pd_ptr) { struct pds_auxiliary_dev *padev; char devname[PDS_DEVNAME_LEN]; - enum pds_core_vif_types vt; unsigned long mask; u16 vt_support; int client_id; @@ -211,6 +209,9 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) if (!cf) return -ENODEV; + if (vt >= PDS_DEV_TYPE_MAX) + return -EINVAL; + mutex_lock(&pf->config_lock); mask = BIT_ULL(PDSC_S_FW_DEAD) | @@ -222,17 +223,10 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) goto out_unlock; } - /* We only support vDPA so far, so it is the only one to - * be verified that it is available in the Core device and - * enabled in the devlink param. In the future this might - * become a loop for several VIF types. - */ - /* Verify that the type is supported and enabled. It is not - * an error if there is no auxbus device support for this - * VF, it just means something else needs to happen with it. + * an error if the firmware doesn't support the feature, the + * driver just won't set up an auxiliary_device for it. */ - vt = PDS_DEV_TYPE_VDPA; vt_support = !!le16_to_cpu(pf->dev_ident.vif_types[vt]); if (!(vt_support && pf->viftype_status[vt].supported && @@ -258,7 +252,7 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) err = PTR_ERR(padev); goto out_unlock; } - pf->vfs[cf->vf_id].padev = padev; + *pd_ptr = padev; out_unlock: mutex_unlock(&pf->config_lock); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 536635e57727..1eb0d92786f7 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -402,6 +402,9 @@ err_out_uninit: } static struct pdsc_viftype pdsc_viftype_defaults[] = { + [PDS_DEV_TYPE_FWCTL] = { .name = PDS_DEV_TYPE_FWCTL_STR, + .vif_id = PDS_DEV_TYPE_FWCTL, + .dl_id = -1 }, [PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR, .vif_id = PDS_DEV_TYPE_VDPA, .dl_id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET }, @@ -428,6 +431,10 @@ static int pdsc_viftypes_init(struct pdsc *pdsc) /* See what the Core device has for support */ vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]); + + if (vt == PDS_DEV_TYPE_FWCTL) + pdsc->viftype_status[vt].enabled = true; + dev_dbg(pdsc->dev, "VIF %s is %ssupported\n", pdsc->viftype_status[vt].name, vt_support ? "" : "not "); diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 14522d6d5f86..0bf320c43083 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -156,6 +156,7 @@ struct pdsc { struct dentry *dentry; struct device *dev; struct pdsc_dev_bar bars[PDS_CORE_BARS_MAX]; + struct pds_auxiliary_dev *padev; struct pdsc_vf *vfs; int num_vfs; int vf_id; @@ -303,8 +304,11 @@ void pdsc_health_thread(struct work_struct *work); int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); void pdsc_notify(unsigned long event, void *data); -int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf); -int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf); +int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf, + enum pds_core_vif_types vt, + struct pds_auxiliary_dev **pd_ptr); +void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, + struct pds_auxiliary_dev **pd_ptr); void pdsc_process_adminq(struct pdsc_qcq *qcq); void pdsc_work_thread(struct work_struct *work); diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 44971e71991f..c5c787df61a4 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -56,8 +56,11 @@ int pdsc_dl_enable_set(struct devlink *dl, u32 id, for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) { struct pdsc *vf = pdsc->vfs[vf_id].vf; - err = ctx->val.vbool ? pdsc_auxbus_dev_add(vf, pdsc) : - pdsc_auxbus_dev_del(vf, pdsc); + if (ctx->val.vbool) + err = pdsc_auxbus_dev_add(vf, pdsc, vt_entry->vif_id, + &pdsc->vfs[vf_id].padev); + else + pdsc_auxbus_dev_del(vf, pdsc, &pdsc->vfs[vf_id].padev); } return err; diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 660268ff9562..4843f9249a31 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -190,7 +190,8 @@ static int pdsc_init_vf(struct pdsc *vf) devl_unlock(dl); pf->vfs[vf->vf_id].vf = vf; - err = pdsc_auxbus_dev_add(vf, pf); + err = pdsc_auxbus_dev_add(vf, pf, PDS_DEV_TYPE_VDPA, + &pf->vfs[vf->vf_id].padev); if (err) { devl_lock(dl); devl_unregister(dl); @@ -264,6 +265,10 @@ static int pdsc_init_pf(struct pdsc *pdsc) mutex_unlock(&pdsc->config_lock); + err = pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL, &pdsc->padev); + if (err) + goto err_out_stop; + dl = priv_to_devlink(pdsc); devl_lock(dl); err = devl_params_register(dl, pdsc_dl_params, @@ -272,7 +277,7 @@ static int pdsc_init_pf(struct pdsc *pdsc) devl_unlock(dl); dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n", ERR_PTR(err)); - goto err_out_stop; + goto err_out_del_dev; } hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc); @@ -295,6 +300,8 @@ static int pdsc_init_pf(struct pdsc *pdsc) err_out_unreg_params: devlink_params_unregister(dl, pdsc_dl_params, ARRAY_SIZE(pdsc_dl_params)); +err_out_del_dev: + pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev); err_out_stop: pdsc_stop(pdsc); err_out_teardown: @@ -417,7 +424,7 @@ static void pdsc_remove(struct pci_dev *pdev) pf = pdsc_get_pf_struct(pdsc->pdev); if (!IS_ERR(pf)) { - pdsc_auxbus_dev_del(pdsc, pf); + pdsc_auxbus_dev_del(pdsc, pf, &pf->vfs[pdsc->vf_id].padev); pf->vfs[pdsc->vf_id].vf = NULL; } } else { @@ -426,6 +433,7 @@ static void pdsc_remove(struct pci_dev *pdev) * shut themselves down. */ pdsc_sriov_configure(pdev, 0); + pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev); timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) @@ -482,7 +490,10 @@ static void pdsc_reset_prepare(struct pci_dev *pdev) pf = pdsc_get_pf_struct(pdsc->pdev); if (!IS_ERR(pf)) - pdsc_auxbus_dev_del(pdsc, pf); + pdsc_auxbus_dev_del(pdsc, pf, + &pf->vfs[pdsc->vf_id].padev); + } else { + pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev); } pdsc_unmap_bars(pdsc); @@ -527,7 +538,11 @@ static void pdsc_reset_done(struct pci_dev *pdev) pf = pdsc_get_pf_struct(pdsc->pdev); if (!IS_ERR(pf)) - pdsc_auxbus_dev_add(pdsc, pf); + pdsc_auxbus_dev_add(pdsc, pf, PDS_DEV_TYPE_VDPA, + &pf->vfs[pdsc->vf_id].padev); + } else { + pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL, + &pdsc->padev); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 9a79674d27f1..891bbbbfbbf1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -228,8 +228,15 @@ enum { MLX5_INTERFACE_PROTOCOL_VNET, MLX5_INTERFACE_PROTOCOL_DPLL, + MLX5_INTERFACE_PROTOCOL_FWCTL, }; +static bool is_fwctl_supported(struct mlx5_core_dev *dev) +{ + /* fwctl is most useful on PFs, prevent fwctl on SFs for now */ + return MLX5_CAP_GEN(dev, uctx_cap) && !mlx5_core_is_sf(dev); +} + static const struct mlx5_adev_device { const char *suffix; bool (*is_supported)(struct mlx5_core_dev *dev); @@ -252,6 +259,8 @@ static const struct mlx5_adev_device { .is_supported = &is_mp_supported }, [MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll", .is_supported = &is_dpll_supported }, + [MLX5_INTERFACE_PROTOCOL_FWCTL] = { .suffix = "fwctl", + .is_supported = &is_fwctl_supported }, }; int mlx5_adev_idx_alloc(void) diff --git a/include/cxl/features.h b/include/cxl/features.h new file mode 100644 index 000000000000..a3bb34694c06 --- /dev/null +++ b/include/cxl/features.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024-2025 Intel Corporation. */ +#ifndef __CXL_FEATURES_H__ +#define __CXL_FEATURES_H__ + +#include <linux/uuid.h> +#include <linux/fwctl.h> +#include <uapi/cxl/features.h> + +/* Feature UUIDs used by the kernel */ +#define CXL_FEAT_PATROL_SCRUB_UUID \ + UUID_INIT(0x96dad7d6, 0xfde8, 0x482b, 0xa7, 0x33, 0x75, 0x77, 0x4e, \ + 0x06, 0xdb, 0x8a) + +#define CXL_FEAT_ECS_UUID \ + UUID_INIT(0xe5b13f22, 0x2328, 0x4a14, 0xb8, 0xba, 0xb9, 0x69, 0x1e, \ + 0x89, 0x33, 0x86) + +#define CXL_FEAT_SPPR_UUID \ + UUID_INIT(0x892ba475, 0xfad8, 0x474e, 0x9d, 0x3e, 0x69, 0x2c, 0x91, \ + 0x75, 0x68, 0xbb) + +#define CXL_FEAT_HPPR_UUID \ + UUID_INIT(0x80ea4521, 0x786f, 0x4127, 0xaf, 0xb1, 0xec, 0x74, 0x59, \ + 0xfb, 0x0e, 0x24) + +#define CXL_FEAT_CACHELINE_SPARING_UUID \ + UUID_INIT(0x96C33386, 0x91dd, 0x44c7, 0x9e, 0xcb, 0xfd, 0xaf, 0x65, \ + 0x03, 0xba, 0xc4) + +#define CXL_FEAT_ROW_SPARING_UUID \ + UUID_INIT(0x450ebf67, 0xb135, 0x4f97, 0xa4, 0x98, 0xc2, 0xd5, 0x7f, \ + 0x27, 0x9b, 0xed) + +#define CXL_FEAT_BANK_SPARING_UUID \ + UUID_INIT(0x78b79636, 0x90ac, 0x4b64, 0xa4, 0xef, 0xfa, 0xac, 0x5d, \ + 0x18, 0xa8, 0x63) + +#define CXL_FEAT_RANK_SPARING_UUID \ + UUID_INIT(0x34dbaff5, 0x0552, 0x4281, 0x8f, 0x76, 0xda, 0x0b, 0x5e, \ + 0x7a, 0x76, 0xa7) + +/* Feature commands capability supported by a device */ +enum cxl_features_capability { + CXL_FEATURES_NONE = 0, + CXL_FEATURES_RO, + CXL_FEATURES_RW, +}; + +/** + * struct cxl_features_state - The Features state for the device + * @cxlds: Pointer to CXL device state + * @entries: CXl feature entry context + */ +struct cxl_features_state { + struct cxl_dev_state *cxlds; + struct cxl_feat_entries { + int num_features; + int num_user_features; + struct cxl_feat_entry ent[] __counted_by(num_features); + } *entries; +}; + +struct cxl_mailbox; +struct cxl_memdev; +#ifdef CONFIG_CXL_FEATURES +inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); +int devm_cxl_setup_features(struct cxl_dev_state *cxlds); +int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd); +#else +static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) +{ + return NULL; +} + +static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds) +{ + return -EOPNOTSUPP; +} + +static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/include/cxl/mailbox.h b/include/cxl/mailbox.h index bacd111e75f1..c4e99e2e3a9d 100644 --- a/include/cxl/mailbox.h +++ b/include/cxl/mailbox.h @@ -3,24 +3,66 @@ #ifndef __CXL_MBOX_H__ #define __CXL_MBOX_H__ #include <linux/rcuwait.h> +#include <cxl/features.h> +#include <uapi/linux/cxl_mem.h> -struct cxl_mbox_cmd; +/** + * struct cxl_mbox_cmd - A command to be submitted to hardware. + * @opcode: (input) The command set and command submitted to hardware. + * @payload_in: (input) Pointer to the input payload. + * @payload_out: (output) Pointer to the output payload. Must be allocated by + * the caller. + * @size_in: (input) Number of bytes to load from @payload_in. + * @size_out: (input) Max number of bytes loaded into @payload_out. + * (output) Number of bytes generated by the device. For fixed size + * outputs commands this is always expected to be deterministic. For + * variable sized output commands, it tells the exact number of bytes + * written. + * @min_out: (input) internal command output payload size validation + * @poll_count: (input) Number of timeouts to attempt. + * @poll_interval_ms: (input) Time between mailbox background command polling + * interval timeouts. + * @return_code: (output) Error code returned from hardware. + * + * This is the primary mechanism used to send commands to the hardware. + * All the fields except @payload_* correspond exactly to the fields described in + * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and + * @payload_out are written to, and read from the Command Payload Registers + * defined in CXL 2.0 8.2.8.4.8. + */ +struct cxl_mbox_cmd { + u16 opcode; + void *payload_in; + void *payload_out; + size_t size_in; + size_t size_out; + size_t min_out; + int poll_count; + int poll_interval_ms; + u16 return_code; +}; /** * struct cxl_mailbox - context for CXL mailbox operations * @host: device that hosts the mailbox + * @enabled_cmds: mailbox commands that are enabled by the driver + * @exclusive_cmds: mailbox commands that are exclusive to the kernel * @payload_size: Size of space for payload * (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register) * @mbox_mutex: mutex protects device mailbox and firmware * @mbox_wait: rcuwait for mailbox * @mbox_send: @dev specific transport for transmitting mailbox commands + * @feat_cap: Features capability */ struct cxl_mailbox { struct device *host; + DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); + DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); size_t payload_size; struct mutex mbox_mutex; /* lock to protect mailbox context */ struct rcuwait mbox_wait; int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd); + enum cxl_features_capability feat_cap; }; int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host); diff --git a/include/linux/fwctl.h b/include/linux/fwctl.h new file mode 100644 index 000000000000..5d61fc8a6871 --- /dev/null +++ b/include/linux/fwctl.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __LINUX_FWCTL_H +#define __LINUX_FWCTL_H +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/cleanup.h> +#include <uapi/fwctl/fwctl.h> + +struct fwctl_device; +struct fwctl_uctx; + +/** + * struct fwctl_ops - Driver provided operations + * + * fwctl_unregister() will wait until all excuting ops are completed before it + * returns. Drivers should be mindful to not let their ops run for too long as + * it will block device hot unplug and module unloading. + */ +struct fwctl_ops { + /** + * @device_type: The drivers assigned device_type number. This is uABI. + */ + enum fwctl_device_type device_type; + /** + * @uctx_size: The size of the fwctl_uctx struct to allocate. The first + * bytes of this memory will be a fwctl_uctx. The driver can use the + * remaining bytes as its private memory. + */ + size_t uctx_size; + /** + * @open_uctx: Called when a file descriptor is opened before the uctx + * is ever used. + */ + int (*open_uctx)(struct fwctl_uctx *uctx); + /** + * @close_uctx: Called when the uctx is destroyed, usually when the FD + * is closed. + */ + void (*close_uctx)(struct fwctl_uctx *uctx); + /** + * @info: Implement FWCTL_INFO. Return a kmalloc() memory that is copied + * to out_device_data. On input length indicates the size of the user + * buffer on output it indicates the size of the memory. The driver can + * ignore length on input, the core code will handle everything. + */ + void *(*info)(struct fwctl_uctx *uctx, size_t *length); + /** + * @fw_rpc: Implement FWCTL_RPC. Deliver rpc_in/in_len to the FW and + * return the response and set out_len. rpc_in can be returned as the + * response pointer. Otherwise the returned pointer is freed with + * kvfree(). + */ + void *(*fw_rpc)(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, + void *rpc_in, size_t in_len, size_t *out_len); +}; + +/** + * struct fwctl_device - Per-driver registration struct + * @dev: The sysfs (class/fwctl/fwctlXX) device + * + * Each driver instance will have one of these structs with the driver private + * data following immediately after. This struct is refcounted, it is freed by + * calling fwctl_put(). + */ +struct fwctl_device { + struct device dev; + /* private: */ + struct cdev cdev; + + /* Protect uctx_list */ + struct mutex uctx_list_lock; + struct list_head uctx_list; + /* + * Protect ops, held for write when ops becomes NULL during unregister, + * held for read whenever ops is loaded or an ops function is running. + */ + struct rw_semaphore registration_lock; + const struct fwctl_ops *ops; +}; + +struct fwctl_device *_fwctl_alloc_device(struct device *parent, + const struct fwctl_ops *ops, + size_t size); +/** + * fwctl_alloc_device - Allocate a fwctl + * @parent: Physical device that provides the FW interface + * @ops: Driver ops to register + * @drv_struct: 'struct driver_fwctl' that holds the struct fwctl_device + * @member: Name of the struct fwctl_device in @drv_struct + * + * This allocates and initializes the fwctl_device embedded in the drv_struct. + * Upon success the pointer must be freed via fwctl_put(). Returns a 'drv_struct + * \*' on success, NULL on error. + */ +#define fwctl_alloc_device(parent, ops, drv_struct, member) \ + ({ \ + static_assert(__same_type(struct fwctl_device, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_fwctl_alloc_device(parent, ops, \ + sizeof(drv_struct)); \ + }) + +static inline struct fwctl_device *fwctl_get(struct fwctl_device *fwctl) +{ + get_device(&fwctl->dev); + return fwctl; +} +static inline void fwctl_put(struct fwctl_device *fwctl) +{ + put_device(&fwctl->dev); +} +DEFINE_FREE(fwctl, struct fwctl_device *, if (_T) fwctl_put(_T)); + +int fwctl_register(struct fwctl_device *fwctl); +void fwctl_unregister(struct fwctl_device *fwctl); + +/** + * struct fwctl_uctx - Per user FD context + * @fwctl: fwctl instance that owns the context + * + * Every FD opened by userspace will get a unique context allocation. Any driver + * private data will follow immediately after. + */ +struct fwctl_uctx { + struct fwctl_device *fwctl; + /* private: */ + /* Head at fwctl_device::uctx_list */ + struct list_head uctx_list_entry; +}; + +#endif diff --git a/include/linux/panic.h b/include/linux/panic.h index 54d90b6c5f47..2494d51707ef 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -74,7 +74,8 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 #define TAINT_TEST 18 -#define TAINT_FLAGS_COUNT 19 +#define TAINT_FWCTL 19 +#define TAINT_FLAGS_COUNT 20 #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index 4b4e9a98b37b..ddd111f04ca0 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -1179,6 +1179,274 @@ struct pds_lm_host_vf_status_cmd { u8 status; }; +enum pds_fwctl_cmd_opcode { + PDS_FWCTL_CMD_IDENT = 70, + PDS_FWCTL_CMD_RPC = 71, + PDS_FWCTL_CMD_QUERY = 72, +}; + +/** + * struct pds_fwctl_cmd - Firmware control command structure + * @opcode: Opcode + * @rsvd: Reserved + * @ep: Endpoint identifier + * @op: Operation identifier + */ +struct pds_fwctl_cmd { + u8 opcode; + u8 rsvd[3]; + __le32 ep; + __le32 op; +} __packed; + +/** + * struct pds_fwctl_comp - Firmware control completion structure + * @status: Status of the firmware control operation + * @rsvd: Reserved + * @comp_index: Completion index in little-endian format + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 rsvd2[11]; + u8 color; +} __packed; + +/** + * struct pds_fwctl_ident_cmd - Firmware control identification command structure + * @opcode: Operation code for the command + * @rsvd: Reserved + * @version: Interface version + * @rsvd2: Reserved + * @len: Length of the identification data + * @ident_pa: Physical address of the identification data + */ +struct pds_fwctl_ident_cmd { + u8 opcode; + u8 rsvd; + u8 version; + u8 rsvd2; + __le32 len; + __le64 ident_pa; +} __packed; + +/* future feature bits here + * enum pds_fwctl_features { + * }; + * (compilers don't like empty enums) + */ + +/** + * struct pds_fwctl_ident - Firmware control identification structure + * @features: Supported features (enum pds_fwctl_features) + * @version: Interface version + * @rsvd: Reserved + * @max_req_sz: Maximum request size + * @max_resp_sz: Maximum response size + * @max_req_sg_elems: Maximum number of request SGs + * @max_resp_sg_elems: Maximum number of response SGs + */ +struct pds_fwctl_ident { + __le64 features; + u8 version; + u8 rsvd[3]; + __le32 max_req_sz; + __le32 max_resp_sz; + u8 max_req_sg_elems; + u8 max_resp_sg_elems; +} __packed; + +enum pds_fwctl_query_entity { + PDS_FWCTL_RPC_ROOT = 0, + PDS_FWCTL_RPC_ENDPOINT = 1, + PDS_FWCTL_RPC_OPERATION = 2, +}; + +#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT 0 +#define PDS_FWCTL_RPC_OPCODE_CMD_MASK GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT) +#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT 16 +#define PDS_FWCTL_RPC_OPCODE_VER_MASK GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT) + +#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_CMD_MASK, op) +#define PDS_FWCTL_RPC_OPCODE_GET_VER(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_VER_MASK, op) + +#define PDS_FWCTL_RPC_OPCODE_CMP(op1, op2) \ + (PDS_FWCTL_RPC_OPCODE_GET_CMD(op1) == PDS_FWCTL_RPC_OPCODE_GET_CMD(op2) && \ + PDS_FWCTL_RPC_OPCODE_GET_VER(op1) <= PDS_FWCTL_RPC_OPCODE_GET_VER(op2)) + +/* + * FW command attributes that map to the FWCTL scope values + */ +#define PDSFC_FW_CMD_ATTR_READ 0x00 +#define PDSFC_FW_CMD_ATTR_DEBUG_READ 0x02 +#define PDSFC_FW_CMD_ATTR_WRITE 0x04 +#define PDSFC_FW_CMD_ATTR_DEBUG_WRITE 0x08 +#define PDSFC_FW_CMD_ATTR_SYNC 0x10 + +/** + * struct pds_fwctl_query_cmd - Firmware control query command structure + * @opcode: Operation code for the command + * @entity: Entity type to query (enum pds_fwctl_query_entity) + * @version: Version of the query data structure supported by the driver + * @rsvd: Reserved + * @query_data_buf_len: Length of the query data buffer + * @query_data_buf_pa: Physical address of the query data buffer + * @ep: Endpoint identifier to query (when entity is PDS_FWCTL_RPC_ENDPOINT) + * @op: Operation identifier to query (when entity is PDS_FWCTL_RPC_OPERATION) + * + * This structure is used to send a query command to the firmware control + * interface. The structure is packed to ensure there is no padding between + * the fields. + */ +struct pds_fwctl_query_cmd { + u8 opcode; + u8 entity; + u8 version; + u8 rsvd; + __le32 query_data_buf_len; + __le64 query_data_buf_pa; + union { + __le32 ep; + __le32 op; + }; +} __packed; + +/** + * struct pds_fwctl_query_comp - Firmware control query completion structure + * @status: Status of the query command + * @rsvd: Reserved + * @comp_index: Completion index in little-endian format + * @version: Version of the query data structure returned by firmware. This + * should be less than or equal to the version supported by the driver + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_query_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 version; + u8 rsvd2[2]; + u8 color; +} __packed; + +/** + * struct pds_fwctl_query_data_endpoint - query data for entity PDS_FWCTL_RPC_ROOT + * @id: The identifier for the data endpoint + */ +struct pds_fwctl_query_data_endpoint { + __le32 id; +} __packed; + +/** + * struct pds_fwctl_query_data_operation - query data for entity PDS_FWCTL_RPC_ENDPOINT + * @id: Operation identifier + * @scope: Scope of the operation (enum fwctl_rpc_scope) + * @rsvd: Reserved + */ +struct pds_fwctl_query_data_operation { + __le32 id; + u8 scope; + u8 rsvd[3]; +} __packed; + +/** + * struct pds_fwctl_query_data - query data structure + * @version: Version of the query data structure + * @rsvd: Reserved + * @num_entries: Number of entries in the union + * @entries: Array of query data entries, depending on the entity type + */ +struct pds_fwctl_query_data { + u8 version; + u8 rsvd[3]; + __le32 num_entries; + u8 entries[] __counted_by_le(num_entries); +} __packed; + +/** + * struct pds_fwctl_rpc_cmd - Firmware control RPC command + * @opcode: opcode PDS_FWCTL_CMD_RPC + * @rsvd: Reserved + * @flags: Indicates indirect request and/or response handling + * @ep: Endpoint identifier + * @op: Operation identifier + * @inline_req0: Buffer for inline request + * @inline_req1: Buffer for inline request + * @req_pa: Physical address of request data + * @req_sz: Size of the request + * @req_sg_elems: Number of request SGs + * @req_rsvd: Reserved + * @inline_req2: Buffer for inline request + * @resp_pa: Physical address of response data + * @resp_sz: Size of the response + * @resp_sg_elems: Number of response SGs + * @resp_rsvd: Reserved + */ +struct pds_fwctl_rpc_cmd { + u8 opcode; + u8 rsvd; + __le16 flags; +#define PDS_FWCTL_RPC_IND_REQ 0x1 +#define PDS_FWCTL_RPC_IND_RESP 0x2 + __le32 ep; + __le32 op; + u8 inline_req0[16]; + union { + u8 inline_req1[16]; + struct { + __le64 req_pa; + __le32 req_sz; + u8 req_sg_elems; + u8 req_rsvd[3]; + }; + }; + union { + u8 inline_req2[16]; + struct { + __le64 resp_pa; + __le32 resp_sz; + u8 resp_sg_elems; + u8 resp_rsvd[3]; + }; + }; +} __packed; + +/** + * struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + * @rsvd: Reserved + */ +struct pds_sg_elem { + __le64 addr; + __le32 len; + u8 rsvd[4]; +} __packed; + +/** + * struct pds_fwctl_rpc_comp - Completion of a firmware control RPC + * @status: Status of the command + * @rsvd: Reserved + * @comp_index: Completion index of the command + * @err: Error code, if any, from the RPC + * @resp_sz: Size of the response + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_rpc_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + __le32 err; + __le32 resp_sz; + u8 rsvd2[3]; + u8 color; +} __packed; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -1216,6 +1484,11 @@ union pds_core_adminq_cmd { struct pds_lm_dirty_enable_cmd lm_dirty_enable; struct pds_lm_dirty_disable_cmd lm_dirty_disable; struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack; + + struct pds_fwctl_cmd fwctl; + struct pds_fwctl_ident_cmd fwctl_ident; + struct pds_fwctl_rpc_cmd fwctl_rpc; + struct pds_fwctl_query_cmd fwctl_query; }; union pds_core_adminq_comp { @@ -1243,6 +1516,10 @@ union pds_core_adminq_comp { struct pds_lm_state_size_comp lm_state_size; struct pds_lm_dirty_status_comp lm_dirty_status; + + struct pds_fwctl_comp fwctl; + struct pds_fwctl_rpc_comp fwctl_rpc; + struct pds_fwctl_query_comp fwctl_query; }; #ifndef __CHECKER__ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 5802e1deef24..b193adbe7cc3 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -29,6 +29,7 @@ enum pds_core_vif_types { PDS_DEV_TYPE_ETH = 3, PDS_DEV_TYPE_RDMA = 4, PDS_DEV_TYPE_LM = 5, + PDS_DEV_TYPE_FWCTL = 6, /* new ones added before this line */ PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */ @@ -40,6 +41,7 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_ETH_STR "Eth" #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" +#define PDS_DEV_TYPE_FWCTL_STR "fwctl" #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR #define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR diff --git a/include/uapi/cxl/features.h b/include/uapi/cxl/features.h new file mode 100644 index 000000000000..d6db8984889f --- /dev/null +++ b/include/uapi/cxl/features.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2024,2025, Intel Corporation + * + * These are definitions for the mailbox command interface of CXL subsystem. + */ +#ifndef _UAPI_CXL_FEATURES_H_ +#define _UAPI_CXL_FEATURES_H_ + +#include <linux/types.h> +#ifndef __KERNEL__ +#include <uuid/uuid.h> +#else +#include <linux/uuid.h> +#endif + +/* + * struct cxl_mbox_get_sup_feats_in - Get Supported Features input + * + * @count: bytes of Feature data to return in output + * @start_idx: index of first requested Supported Feature Entry, 0 based. + * @reserved: reserved field, must be 0s. + * + * Get Supported Features (0x500h) CXL r3.2 8.2.9.6.1 command. + * Input block for Get support Feature + */ +struct cxl_mbox_get_sup_feats_in { + __le32 count; + __le16 start_idx; + __u8 reserved[2]; +} __attribute__ ((__packed__)); + +/* CXL spec r3.2 Table 8-87 command effects */ +#define CXL_CMD_CONFIG_CHANGE_COLD_RESET BIT(0) +#define CXL_CMD_CONFIG_CHANGE_IMMEDIATE BIT(1) +#define CXL_CMD_DATA_CHANGE_IMMEDIATE BIT(2) +#define CXL_CMD_POLICY_CHANGE_IMMEDIATE BIT(3) +#define CXL_CMD_LOG_CHANGE_IMMEDIATE BIT(4) +#define CXL_CMD_SECURITY_STATE_CHANGE BIT(5) +#define CXL_CMD_BACKGROUND BIT(6) +#define CXL_CMD_BGCMD_ABORT_SUPPORTED BIT(7) +#define CXL_CMD_EFFECTS_VALID BIT(9) +#define CXL_CMD_CONFIG_CHANGE_CONV_RESET BIT(10) +#define CXL_CMD_CONFIG_CHANGE_CXL_RESET BIT(11) +#define CXL_CMD_EFFECTS_RESERVED GENMASK(15, 12) + +/* + * struct cxl_feat_entry - Supported Feature Entry + * @uuid: UUID of the Feature + * @id: id to identify the feature. 0 based + * @get_feat_size: max bytes required for Get Feature command for this Feature + * @set_feat_size: max bytes required for Set Feature command for this Feature + * @flags: attribute flags + * @get_feat_ver: Get Feature version + * @set_feat_ver: Set Feature version + * @effects: Set Feature command effects + * @reserved: reserved, must be 0 + * + * CXL spec r3.2 Table 8-109 + * Get Supported Features Supported Feature Entry + */ +struct cxl_feat_entry { + uuid_t uuid; + __le16 id; + __le16 get_feat_size; + __le16 set_feat_size; + __le32 flags; + __u8 get_feat_ver; + __u8 set_feat_ver; + __le16 effects; + __u8 reserved[18]; +} __attribute__ ((__packed__)); + +/* @flags field for 'struct cxl_feat_entry' */ +#define CXL_FEATURE_F_CHANGEABLE BIT(0) +#define CXL_FEATURE_F_PERSIST_FW_UPDATE BIT(4) +#define CXL_FEATURE_F_DEFAULT_SEL BIT(5) +#define CXL_FEATURE_F_SAVED_SEL BIT(6) + +/* + * struct cxl_mbox_get_sup_feats_out - Get Supported Features output + * @num_entries: number of Supported Feature Entries returned + * @supported_feats: number of supported Features + * @reserved: reserved, must be 0s. + * @ents: Supported Feature Entries array + * + * CXL spec r3.2 Table 8-108 + * Get supported Features Output Payload + */ +struct cxl_mbox_get_sup_feats_out { + __struct_group(cxl_mbox_get_sup_feats_out_hdr, hdr, /* no attrs */, + __le16 num_entries; + __le16 supported_feats; + __u8 reserved[4]; + ); + struct cxl_feat_entry ents[] __counted_by_le(num_entries); +} __attribute__ ((__packed__)); + +/* + * Get Feature CXL spec r3.2 Spec 8.2.9.6.2 + */ + +/* + * struct cxl_mbox_get_feat_in - Get Feature input + * @uuid: UUID for Feature + * @offset: offset of the first byte in Feature data for output payload + * @count: count in bytes of Feature data returned + * @selection: 0 current value, 1 default value, 2 saved value + * + * CXL spec r3.2 section 8.2.9.6.2 Table 8-99 + */ +struct cxl_mbox_get_feat_in { + uuid_t uuid; + __le16 offset; + __le16 count; + __u8 selection; +} __attribute__ ((__packed__)); + +/* + * enum cxl_get_feat_selection - selection field of Get Feature input + */ +enum cxl_get_feat_selection { + CXL_GET_FEAT_SEL_CURRENT_VALUE, + CXL_GET_FEAT_SEL_DEFAULT_VALUE, + CXL_GET_FEAT_SEL_SAVED_VALUE, + CXL_GET_FEAT_SEL_MAX +}; + +/* + * Set Feature CXL spec r3.2 8.2.9.6.3 + */ + +/* + * struct cxl_mbox_set_feat_in - Set Features input + * @uuid: UUID for Feature + * @flags: set feature flags + * @offset: byte offset of Feature data to update + * @version: Feature version of the data in Feature Data + * @rsvd: reserved, must be 0s. + * @feat_data: raw byte stream of Features data to update + * + * CXL spec r3.2 section 8.2.9.6.3 Table 8-101 + */ +struct cxl_mbox_set_feat_in { + __struct_group(cxl_mbox_set_feat_hdr, hdr, /* no attrs */, + uuid_t uuid; + __le32 flags; + __le16 offset; + __u8 version; + __u8 rsvd[9]; + ); + __u8 feat_data[]; +} __packed; + +/* + * enum cxl_set_feat_flag_data_transfer - Set Feature flags field + */ +enum cxl_set_feat_flag_data_transfer { + CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER = 0, + CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER, + CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER, + CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER, + CXL_SET_FEAT_FLAG_ABORT_DATA_TRANSFER, + CXL_SET_FEAT_FLAG_DATA_TRANSFER_MAX +}; + +#define CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK GENMASK(2, 0) +#define CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET BIT(3) + +#endif diff --git a/include/uapi/fwctl/cxl.h b/include/uapi/fwctl/cxl.h new file mode 100644 index 000000000000..43f522f0cdcd --- /dev/null +++ b/include/uapi/fwctl/cxl.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2024-2025 Intel Corporation + * + * These are definitions for the mailbox command interface of CXL subsystem. + */ +#ifndef _UAPI_FWCTL_CXL_H_ +#define _UAPI_FWCTL_CXL_H_ + +#include <linux/types.h> +#include <linux/stddef.h> +#include <cxl/features.h> + +/** + * struct fwctl_rpc_cxl - ioctl(FWCTL_RPC) input for CXL + * @opcode: CXL mailbox command opcode + * @flags: Flags for the command (input). + * @op_size: Size of input payload. + * @reserved1: Reserved. Must be 0s. + * @get_sup_feats_in: Get Supported Features input + * @get_feat_in: Get Feature input + * @set_feat_in: Set Feature input + */ +struct fwctl_rpc_cxl { + __struct_group(fwctl_rpc_cxl_hdr, hdr, /* no attrs */, + __u32 opcode; + __u32 flags; + __u32 op_size; + __u32 reserved1; + ); + union { + struct cxl_mbox_get_sup_feats_in get_sup_feats_in; + struct cxl_mbox_get_feat_in get_feat_in; + struct cxl_mbox_set_feat_in set_feat_in; + }; +}; + +/** + * struct fwctl_rpc_cxl_out - ioctl(FWCTL_RPC) output for CXL + * @size: Size of the output payload + * @retval: Return value from device + * @get_sup_feats_out: Get Supported Features output + * @payload: raw byte stream of payload + */ +struct fwctl_rpc_cxl_out { + __struct_group(fwctl_rpc_cxl_out_hdr, hdr, /* no attrs */, + __u32 size; + __u32 retval; + ); + union { + struct cxl_mbox_get_sup_feats_out get_sup_feats_out; + __DECLARE_FLEX_ARRAY(__u8, payload); + }; +}; + +#endif diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h new file mode 100644 index 000000000000..716ac0eee42d --- /dev/null +++ b/include/uapi/fwctl/fwctl.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef _UAPI_FWCTL_H +#define _UAPI_FWCTL_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +#define FWCTL_TYPE 0x9A + +/** + * DOC: General ioctl format + * + * The ioctl interface follows a general format to allow for extensibility. Each + * ioctl is passed a structure pointer as the argument providing the size of + * the structure in the first u32. The kernel checks that any structure space + * beyond what it understands is 0. This allows userspace to use the backward + * compatible portion while consistently using the newer, larger, structures. + * + * ioctls use a standard meaning for common errnos: + * + * - ENOTTY: The IOCTL number itself is not supported at all + * - E2BIG: The IOCTL number is supported, but the provided structure has + * non-zero in a part the kernel does not understand. + * - EOPNOTSUPP: The IOCTL number is supported, and the structure is + * understood, however a known field has a value the kernel does not + * understand or support. + * - EINVAL: Everything about the IOCTL was understood, but a field is not + * correct. + * - ENOMEM: Out of memory. + * - ENODEV: The underlying device has been hot-unplugged and the FD is + * orphaned. + * + * As well as additional errnos, within specific ioctls. + */ +enum { + FWCTL_CMD_BASE = 0, + FWCTL_CMD_INFO = 0, + FWCTL_CMD_RPC = 1, +}; + +enum fwctl_device_type { + FWCTL_DEVICE_TYPE_ERROR = 0, + FWCTL_DEVICE_TYPE_MLX5 = 1, + FWCTL_DEVICE_TYPE_CXL = 2, + FWCTL_DEVICE_TYPE_PDS = 4, +}; + +/** + * struct fwctl_info - ioctl(FWCTL_INFO) + * @size: sizeof(struct fwctl_info) + * @flags: Must be 0 + * @out_device_type: Returns the type of the device from enum fwctl_device_type + * @device_data_len: On input the length of the out_device_data memory. On + * output the size of the kernel's device_data which may be larger or + * smaller than the input. Maybe 0 on input. + * @out_device_data: Pointer to a memory of device_data_len bytes. Kernel will + * fill the entire memory, zeroing as required. + * + * Returns basic information about this fwctl instance, particularly what driver + * is being used to define the device_data format. + */ +struct fwctl_info { + __u32 size; + __u32 flags; + __u32 out_device_type; + __u32 device_data_len; + __aligned_u64 out_device_data; +}; +#define FWCTL_INFO _IO(FWCTL_TYPE, FWCTL_CMD_INFO) + +/** + * enum fwctl_rpc_scope - Scope of access for the RPC + * + * Refer to fwctl.rst for a more detailed discussion of these scopes. + */ +enum fwctl_rpc_scope { + /** + * @FWCTL_RPC_CONFIGURATION: Device configuration access scope + * + * Read/write access to device configuration. When configuration + * is written to the device it remains in a fully supported state. + */ + FWCTL_RPC_CONFIGURATION = 0, + /** + * @FWCTL_RPC_DEBUG_READ_ONLY: Read only access to debug information + * + * Readable debug information. Debug information is compatible with + * kernel lockdown, and does not disclose any sensitive information. For + * instance exposing any encryption secrets from this information is + * forbidden. + */ + FWCTL_RPC_DEBUG_READ_ONLY = 1, + /** + * @FWCTL_RPC_DEBUG_WRITE: Writable access to lockdown compatible debug information + * + * Allows write access to data in the device which may leave a fully + * supported state. This is intended to permit intensive and possibly + * invasive debugging. This scope will taint the kernel. + */ + FWCTL_RPC_DEBUG_WRITE = 2, + /** + * @FWCTL_RPC_DEBUG_WRITE_FULL: Write access to all debug information + * + * Allows read/write access to everything. Requires CAP_SYS_RAW_IO, so + * it is not required to follow lockdown principals. If in doubt + * debugging should be placed in this scope. This scope will taint the + * kernel. + */ + FWCTL_RPC_DEBUG_WRITE_FULL = 3, +}; + +/** + * struct fwctl_rpc - ioctl(FWCTL_RPC) + * @size: sizeof(struct fwctl_rpc) + * @scope: One of enum fwctl_rpc_scope, required scope for the RPC + * @in_len: Length of the in memory + * @out_len: Length of the out memory + * @in: Request message in device specific format + * @out: Response message in device specific format + * + * Deliver a Remote Procedure Call to the device FW and return the response. The + * call's parameters and return are marshaled into linear buffers of memory. Any + * errno indicates that delivery of the RPC to the device failed. Return status + * originating in the device during a successful delivery must be encoded into + * out. + * + * The format of the buffers matches the out_device_type from FWCTL_INFO. + */ +struct fwctl_rpc { + __u32 size; + __u32 scope; + __u32 in_len; + __u32 out_len; + __aligned_u64 in; + __aligned_u64 out; +}; +#define FWCTL_RPC _IO(FWCTL_TYPE, FWCTL_CMD_RPC) + +#endif diff --git a/include/uapi/fwctl/mlx5.h b/include/uapi/fwctl/mlx5.h new file mode 100644 index 000000000000..625819180ac6 --- /dev/null +++ b/include/uapi/fwctl/mlx5.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + * + * These are definitions for the command interface for mlx5 HW. mlx5 FW has a + * User Context mechanism which allows the FW to understand a security scope. + * FWCTL binds each FD to a FW user context and then places the User Context ID + * (UID) in each command header. The created User Context has a capability set + * that is appropriate for FWCTL's security model. + * + * Command formation should use a copy of the structs in mlx5_ifc.h following + * the Programmers Reference Manual. A open release is available here: + * + * https://network.nvidia.com/files/doc-2020/ethernet-adapters-programming-manual.pdf + * + * The device_type for this file is FWCTL_DEVICE_TYPE_MLX5. + */ +#ifndef _UAPI_FWCTL_MLX5_H +#define _UAPI_FWCTL_MLX5_H + +#include <linux/types.h> + +/** + * struct fwctl_info_mlx5 - ioctl(FWCTL_INFO) out_device_data + * @uid: The FW UID this FD is bound to. Each command header will force + * this value. + * @uctx_caps: The FW capabilities that are enabled for the uid. + * + * Return basic information about the FW interface available. + */ +struct fwctl_info_mlx5 { + __u32 uid; + __u32 uctx_caps; +}; + +#endif diff --git a/include/uapi/fwctl/pds.h b/include/uapi/fwctl/pds.h new file mode 100644 index 000000000000..66be32411642 --- /dev/null +++ b/include/uapi/fwctl/pds.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright(c) Advanced Micro Devices, Inc */ + +/* + * fwctl interface info for pds_fwctl + */ + +#ifndef _UAPI_FWCTL_PDS_H_ +#define _UAPI_FWCTL_PDS_H_ + +#include <linux/types.h> + +/** + * struct fwctl_info_pds + * @uctx_caps: bitmap of firmware capabilities + * + * Return basic information about the FW interface available. + */ +struct fwctl_info_pds { + __u32 uctx_caps; +}; + +/** + * enum pds_fwctl_capabilities + * @PDS_FWCTL_QUERY_CAP: firmware can be queried for information + * @PDS_FWCTL_SEND_CAP: firmware can be sent commands + */ +enum pds_fwctl_capabilities { + PDS_FWCTL_QUERY_CAP = 0, + PDS_FWCTL_SEND_CAP, +}; + +/** + * struct fwctl_rpc_pds + * @in.op: requested operation code + * @in.ep: firmware endpoint to operate on + * @in.rsvd: reserved + * @in.len: length of payload data + * @in.payload: address of payload buffer + * @in: rpc in parameters + * @out.retval: operation result value + * @out.rsvd: reserved + * @out.len: length of result data buffer + * @out.payload: address of payload data buffer + * @out: rpc out parameters + */ +struct fwctl_rpc_pds { + struct { + __u32 op; + __u32 ep; + __u32 rsvd; + __u32 len; + __aligned_u64 payload; + } in; + struct { + __u32 retval; + __u32 rsvd[2]; + __u32 len; + __aligned_u64 payload; + } out; +}; +#endif /* _UAPI_FWCTL_PDS_H_ */ diff --git a/kernel/panic.c b/kernel/panic.c index d8635d5cecb2..0c55eec9e874 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -511,6 +511,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { TAINT_FLAG(AUX, 'X', ' ', true), TAINT_FLAG(RANDSTRUCT, 'T', ' ', true), TAINT_FLAG(TEST, 'N', ' ', true), + TAINT_FLAG(FWCTL, 'J', ' ', true), }; #undef TAINT_FLAG diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint index 279be06332be..e7da0909d097 100755 --- a/tools/debugging/kernel-chktaint +++ b/tools/debugging/kernel-chktaint @@ -204,6 +204,14 @@ else echo " * an in-kernel test (such as a KUnit test) has been run (#18)" fi +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "J" + echo " * fwctl's mutating debug interface was used (#19)" +fi + echo "For a more detailed explanation of the various taint flags see" echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources" echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html" diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index b1256fee3567..0a6572ab6f37 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -63,6 +63,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o +cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8d731bd63988..9495dbcc03a7 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -45,6 +45,18 @@ static struct cxl_cel_entry mock_cel[] = { .effect = CXL_CMD_EFFECT_NONE, }, { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_FEATURES), + .effect = CXL_CMD_EFFECT_NONE, + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FEATURE), + .effect = CXL_CMD_EFFECT_NONE, + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_SET_FEATURE), + .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE)), + }, + { .opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY), .effect = CXL_CMD_EFFECT_NONE, }, @@ -145,6 +157,10 @@ struct mock_event_store { u32 ev_status; }; +struct vendor_test_feat { + __le32 data; +} __packed; + struct cxl_mockmem_data { void *lsa; void *fw; @@ -161,6 +177,7 @@ struct cxl_mockmem_data { u8 event_buf[SZ_4K]; u64 timestamp; unsigned long sanitize_timeout; + struct vendor_test_feat test_feat; }; static struct mock_event_log *event_find_log(struct device *dev, int log_type) @@ -1354,6 +1371,151 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata, return -EINVAL; } +#define CXL_VENDOR_FEATURE_TEST \ + UUID_INIT(0xffffffff, 0xffff, 0xffff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0xff, 0xff, 0xff) + +static void fill_feature_vendor_test(struct cxl_feat_entry *feat) +{ + feat->uuid = CXL_VENDOR_FEATURE_TEST; + feat->id = 0; + feat->get_feat_size = cpu_to_le16(0x4); + feat->set_feat_size = cpu_to_le16(0x4); + feat->flags = cpu_to_le32(CXL_FEATURE_F_CHANGEABLE | + CXL_FEATURE_F_DEFAULT_SEL | + CXL_FEATURE_F_SAVED_SEL); + feat->get_feat_ver = 1; + feat->set_feat_ver = 1; + feat->effects = cpu_to_le16(CXL_CMD_CONFIG_CHANGE_COLD_RESET | + CXL_CMD_EFFECTS_VALID); +} + +#define MAX_CXL_TEST_FEATS 1 + +static int mock_get_test_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct vendor_test_feat *output = cmd->payload_out; + struct cxl_mbox_get_feat_in *input = cmd->payload_in; + u16 offset = le16_to_cpu(input->offset); + u16 count = le16_to_cpu(input->count); + u8 *ptr; + + if (offset > sizeof(*output)) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + if (offset + count > sizeof(*output)) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + ptr = (u8 *)&mdata->test_feat + offset; + memcpy((u8 *)output + offset, ptr, count); + + return 0; +} + +static int mock_get_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_get_feat_in *input = cmd->payload_in; + + if (uuid_equal(&input->uuid, &CXL_VENDOR_FEATURE_TEST)) + return mock_get_test_feature(mdata, cmd); + + cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED; + + return -EOPNOTSUPP; +} + +static int mock_set_test_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_feat_in *input = cmd->payload_in; + struct vendor_test_feat *test = + (struct vendor_test_feat *)input->feat_data; + u32 action; + + action = FIELD_GET(CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK, + le32_to_cpu(input->hdr.flags)); + /* + * While it is spec compliant to support other set actions, it is not + * necessary to add the complication in the emulation currently. Reject + * anything besides full xfer. + */ + if (action != CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + /* Offset should be reserved when doing full transfer */ + if (input->hdr.offset) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + memcpy(&mdata->test_feat.data, &test->data, sizeof(u32)); + + return 0; +} + +static int mock_set_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_feat_in *input = cmd->payload_in; + + if (uuid_equal(&input->hdr.uuid, &CXL_VENDOR_FEATURE_TEST)) + return mock_set_test_feature(mdata, cmd); + + cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED; + + return -EOPNOTSUPP; +} + +static int mock_get_supported_features(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_get_sup_feats_in *in = cmd->payload_in; + struct cxl_mbox_get_sup_feats_out *out = cmd->payload_out; + struct cxl_feat_entry *feat; + u16 start_idx, count; + + if (cmd->size_out < sizeof(*out)) { + cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN; + return -EINVAL; + } + + /* + * Current emulation only supports 1 feature + */ + start_idx = le16_to_cpu(in->start_idx); + if (start_idx != 0) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + count = le16_to_cpu(in->count); + if (count < struct_size(out, ents, 0)) { + cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN; + return -EINVAL; + } + + out->supported_feats = cpu_to_le16(MAX_CXL_TEST_FEATS); + cmd->return_code = 0; + if (count < struct_size(out, ents, MAX_CXL_TEST_FEATS)) { + out->num_entries = 0; + return 0; + } + + out->num_entries = cpu_to_le16(MAX_CXL_TEST_FEATS); + feat = out->ents; + fill_feature_vendor_test(feat); + + return 0; +} + static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd) { @@ -1439,6 +1601,15 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, case CXL_MBOX_OP_ACTIVATE_FW: rc = mock_activate_fw(mdata, cmd); break; + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + rc = mock_get_supported_features(mdata, cmd); + break; + case CXL_MBOX_OP_GET_FEATURE: + rc = mock_get_feature(mdata, cmd); + break; + case CXL_MBOX_OP_SET_FEATURE: + rc = mock_set_feature(mdata, cmd); + break; default: break; } @@ -1486,6 +1657,11 @@ static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds) return 0; } +static void cxl_mock_test_feat_init(struct cxl_mockmem_data *mdata) +{ + mdata->test_feat.data = cpu_to_le32(0xdeadbeef); +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1558,6 +1734,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + rc = devm_cxl_setup_features(cxlds); + if (rc) + dev_dbg(dev, "No CXL Features discovered\n"); + cxl_mock_add_event_logs(&mdata->mes); cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); @@ -1572,7 +1752,12 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + rc = devm_cxl_setup_fwctl(cxlmd); + if (rc) + dev_dbg(dev, "No CXL FWCTL setup\n"); + cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); + cxl_mock_test_feat_init(mdata); return 0; } |