aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries/vas.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries/vas.c')
-rw-r--r--arch/powerpc/platforms/pseries/vas.c595
1 files changed, 595 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
new file mode 100644
index 000000000000..b5c1cf1bc64d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
+#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
+/* The hypervisor allows one credit per window right now */
+#define DEF_WIN_CREDS 1
+
+static struct vas_all_caps caps_all;
+static bool copypaste_feat;
+
+static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
+static DEFINE_MUTEX(vas_pseries_mutex);
+
+static long hcall_return_busy_check(long rc)
+{
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
+ return rc;
+}
+
+/*
+ * Allocate VAS window hcall
+ */
+static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
+ u8 wintype, u16 credits)
+{
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ long rc;
+
+ do {
+ rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
+ credits, domain[0], domain[1], domain[2],
+ domain[3], domain[4], domain[5]);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS) {
+ if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
+ pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+ win->vas_win.winid = retbuf[0];
+ win->win_addr = retbuf[1];
+ win->complete_irq = retbuf[2];
+ win->fault_irq = retbuf[3];
+ return 0;
+ }
+
+ pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
+ rc, wintype, credits);
+
+ return -EIO;
+}
+
+/*
+ * Deallocate VAS window hcall.
+ */
+static int h_deallocate_vas_window(u64 winid)
+{
+ long rc;
+
+ do {
+ rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
+ rc, winid);
+ return -EIO;
+}
+
+/*
+ * Modify VAS window.
+ * After the window is opened with allocate window hcall, configure it
+ * with flags and LPAR PID before using.
+ */
+static int h_modify_vas_window(struct pseries_vas_window *win)
+{
+ long rc;
+ u32 lpid = mfspr(SPRN_PID);
+
+ /*
+ * AMR value is not supported in Linux VAS implementation.
+ * The hypervisor ignores it if 0 is passed.
+ */
+ do {
+ rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
+ win->vas_win.winid, lpid, 0,
+ VAS_MOD_WIN_FLAGS, 0);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
+ rc, win->vas_win.winid, lpid);
+ return -EIO;
+}
+
+/*
+ * This hcall is used to determine the capabilities from the hypervisor.
+ * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
+ * @query_type: If 0 is passed, the hypervisor returns the overall
+ * capabilities which provides all feature(s) that are
+ * available. Then query the hypervisor to get the
+ * corresponding capabilities for the specific feature.
+ * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
+ * and VAS GZIP Default capabilities.
+ * H_QUERY_NX_CAPABILITIES provides NX GZIP
+ * capabilities.
+ * @result: Return buffer to save capabilities.
+ */
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(hcall, query_type, result);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
+ hcall, rc, query_type, result);
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
+
+/*
+ * hcall to get fault CRB from the hypervisor.
+ */
+static int h_get_nx_fault(u32 winid, u64 buffer)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
+ rc, winid, buffer);
+ return -EIO;
+
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query the
+ * hypervisor to get the fault CRB on the specific fault. Then
+ * process the CRB by updating CSB or send signal if the user space
+ * CSB is invalid.
+ * Note: The hypervisor forwards an interrupt for each fault request.
+ * So one fault CRB to process for each H_GET_NX_FAULT hcall.
+ */
+irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+ struct coprocessor_request_block crb;
+ struct vas_user_win_ref *tsk_ref;
+ int rc;
+
+ rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+ if (!rc) {
+ tsk_ref = &txwin->vas_win.task_ref;
+ vas_dump_crb(&crb);
+ vas_update_csb(&crb, tsk_ref);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Allocate window and setup IRQ mapping.
+ */
+static int allocate_setup_window(struct pseries_vas_window *txwin,
+ u64 *domain, u8 wintype)
+{
+ int rc;
+
+ rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
+ if (rc)
+ return rc;
+ /*
+ * On PowerVM, the hypervisor setup and forwards the fault
+ * interrupt per window. So the IRQ setup and fault handling
+ * will be done for each open window separately.
+ */
+ txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
+ if (!txwin->fault_virq) {
+ pr_err("Failed irq mapping %d\n", txwin->fault_irq);
+ rc = -EINVAL;
+ goto out_win;
+ }
+
+ txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
+ txwin->vas_win.winid);
+ if (!txwin->name) {
+ rc = -ENOMEM;
+ goto out_irq;
+ }
+
+ rc = request_threaded_irq(txwin->fault_virq, NULL,
+ pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+ txwin->name, txwin);
+ if (rc) {
+ pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+ txwin->vas_win.winid, txwin->fault_virq, rc);
+ goto out_free;
+ }
+
+ txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
+
+ return 0;
+out_free:
+ kfree(txwin->name);
+out_irq:
+ irq_dispose_mapping(txwin->fault_virq);
+out_win:
+ h_deallocate_vas_window(txwin->vas_win.winid);
+ return rc;
+}
+
+static inline void free_irq_setup(struct pseries_vas_window *txwin)
+{
+ free_irq(txwin->fault_virq, txwin);
+ kfree(txwin->name);
+ irq_dispose_mapping(txwin->fault_virq);
+}
+
+static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+ enum vas_cop_type cop_type)
+{
+ long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+ struct vas_cop_feat_caps *cop_feat_caps;
+ struct vas_caps *caps;
+ struct pseries_vas_window *txwin;
+ int rc;
+
+ txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
+ if (!txwin)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * A VAS window can have many credits which means that many
+ * requests can be issued simultaneously. But the hypervisor
+ * restricts one credit per window.
+ * The hypervisor introduces 2 different types of credits:
+ * Default credit type (Uses normal priority FIFO):
+ * A limited number of credits are assigned to partitions
+ * based on processor entitlement. But these credits may be
+ * over-committed on a system depends on whether the CPUs
+ * are in shared or dedicated modes - that is, more requests
+ * may be issued across the system than NX can service at
+ * once which can result in paste command failure (RMA_busy).
+ * Then the process has to resend requests or fall-back to
+ * SW compression.
+ * Quality of Service (QoS) credit type (Uses high priority FIFO):
+ * To avoid NX HW contention, the system admins can assign
+ * QoS credits for each LPAR so that this partition is
+ * guaranteed access to NX resources. These credits are
+ * assigned to partitions via the HMC.
+ * Refer PAPR for more information.
+ *
+ * Allocate window with QoS credits if user requested. Otherwise
+ * default credits are used.
+ */
+ if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
+ caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
+ else
+ caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
+
+ cop_feat_caps = &caps->caps;
+
+ if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
+ atomic_read(&cop_feat_caps->target_lpar_creds)) {
+ pr_err("Credits are not available to allocate window\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (vas_id == -1) {
+ /*
+ * The user space is requesting to allocate a window on
+ * a VAS instance where the process is executing.
+ * On PowerVM, domain values are passed to the hypervisor
+ * to select VAS instance. Useful if the process is
+ * affinity to NUMA node.
+ * The hypervisor selects VAS instance if
+ * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
+ * The h_allocate_vas_window hcall is defined to take a
+ * domain values as specified by h_home_node_associativity,
+ * So no unpacking needs to be done.
+ */
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+ VPHN_FLAG_VCPU, smp_processor_id());
+ if (rc != H_SUCCESS) {
+ pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+ goto out;
+ }
+ }
+
+ /*
+ * Allocate / Deallocate window hcalls and setup / free IRQs
+ * have to be protected with mutex.
+ * Open VAS window: Allocate window hcall and setup IRQ
+ * Close VAS window: Deallocate window hcall and free IRQ
+ * The hypervisor waits until all NX requests are
+ * completed before closing the window. So expects OS
+ * to handle NX faults, means IRQ can be freed only
+ * after the deallocate window hcall is returned.
+ * So once the window is closed with deallocate hcall before
+ * the IRQ is freed, it can be assigned to new allocate
+ * hcall with the same fault IRQ by the hypervisor. It can
+ * result in setup IRQ fail for the new window since the
+ * same fault IRQ is not freed by the OS before.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+ cop_feat_caps->win_type);
+ mutex_unlock(&vas_pseries_mutex);
+ if (rc)
+ goto out;
+
+ /*
+ * Modify window and it is ready to use.
+ */
+ rc = h_modify_vas_window(txwin);
+ if (!rc)
+ rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+ if (rc)
+ goto out_free;
+
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+ txwin->win_type = cop_feat_caps->win_type;
+ mutex_lock(&vas_pseries_mutex);
+ list_add(&txwin->win_list, &caps->list);
+ mutex_unlock(&vas_pseries_mutex);
+
+ return &txwin->vas_win;
+
+out_free:
+ /*
+ * Window is not operational. Free IRQ before closing
+ * window so that do not have to hold mutex.
+ */
+ free_irq_setup(txwin);
+ h_deallocate_vas_window(txwin->vas_win.winid);
+out:
+ atomic_dec(&cop_feat_caps->used_lpar_creds);
+ kfree(txwin);
+ return ERR_PTR(rc);
+}
+
+static u64 vas_paste_address(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+ return win->win_addr;
+}
+
+static int deallocate_free_window(struct pseries_vas_window *win)
+{
+ int rc = 0;
+
+ /*
+ * The hypervisor waits for all requests including faults
+ * are processed before closing the window - Means all
+ * credits have to be returned. In the case of fault
+ * request, a credit is returned after OS issues
+ * H_GET_NX_FAULT hcall.
+ * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
+ * hcall.
+ */
+ rc = h_deallocate_vas_window(win->vas_win.winid);
+ if (!rc)
+ free_irq_setup(win);
+
+ return rc;
+}
+
+static int vas_deallocate_window(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+ struct vas_cop_feat_caps *caps;
+ int rc = 0;
+
+ if (!vwin)
+ return -EINVAL;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+
+ /* Should not happen */
+ if (win->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Window (%u): Invalid window type %u\n",
+ vwin->winid, win->win_type);
+ return -EINVAL;
+ }
+
+ caps = &vascaps[win->win_type].caps;
+ mutex_lock(&vas_pseries_mutex);
+ rc = deallocate_free_window(win);
+ if (rc) {
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+ }
+
+ list_del(&win->win_list);
+ atomic_dec(&caps->used_lpar_creds);
+ mutex_unlock(&vas_pseries_mutex);
+
+ put_vas_user_win_ref(&vwin->task_ref);
+ mm_context_remove_vas_window(vwin->task_ref.mm);
+
+ kfree(win);
+ return 0;
+}
+
+static const struct vas_user_win_ops vops_pseries = {
+ .open_win = vas_allocate_window, /* Open and configure window */
+ .paste_addr = vas_paste_address, /* To do copy/paste */
+ .close_win = vas_deallocate_window, /* Close window */
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
+ const char *name)
+{
+ int rc;
+
+ if (!copypaste_feat)
+ return -ENOTSUPP;
+
+ rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vas_register_api_pseries);
+
+void vas_unregister_api_pseries(void)
+{
+ vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
+
+/*
+ * Get the specific capabilities based on the feature type.
+ * Right now supports GZIP default and GZIP QoS capabilities.
+ */
+static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+ struct hv_vas_cop_feat_caps *hv_caps)
+{
+ struct vas_cop_feat_caps *caps;
+ struct vas_caps *vcaps;
+ int rc = 0;
+
+ vcaps = &vascaps[type];
+ memset(vcaps, 0, sizeof(*vcaps));
+ INIT_LIST_HEAD(&vcaps->list);
+
+ caps = &vcaps->caps;
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ return rc;
+
+ caps->user_mode = hv_caps->user_mode;
+ if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
+ pr_err("User space COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+
+ caps->descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps->win_type = hv_caps->win_type;
+ if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Unsupported window type %u\n", caps->win_type);
+ return -EINVAL;
+ }
+ caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
+ caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
+ atomic_set(&caps->target_lpar_creds,
+ be16_to_cpu(hv_caps->target_lpar_creds));
+ if (feat == VAS_GZIP_DEF_FEAT) {
+ caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
+
+ if (caps->max_win_creds < DEF_WIN_CREDS) {
+ pr_err("Window creds(%u) > max allowed window creds(%u)\n",
+ DEF_WIN_CREDS, caps->max_win_creds);
+ return -EINVAL;
+ }
+ }
+
+ copypaste_feat = true;
+
+ return 0;
+}
+
+static int __init pseries_vas_init(void)
+{
+ struct hv_vas_cop_feat_caps *hv_cop_caps;
+ struct hv_vas_all_caps *hv_caps;
+ int rc;
+
+ /*
+ * Linux supports user space COPY/PASTE only with Radix
+ */
+ if (!radix_enabled()) {
+ pr_err("API is supported only with radix page tables\n");
+ return -ENOTSUPP;
+ }
+
+ hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+ if (!hv_caps)
+ return -ENOMEM;
+ /*
+ * Get VAS overall capabilities by passing 0 to feature type.
+ */
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ goto out;
+
+ caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
+
+ hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
+ if (!hv_cop_caps) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ /*
+ * QOS capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
+ rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
+ VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
+
+ if (rc)
+ goto out_cop;
+ }
+ /*
+ * Default capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
+ rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
+ VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
+ if (rc)
+ goto out_cop;
+ }
+
+ pr_info("GZIP feature is available\n");
+
+out_cop:
+ kfree(hv_cop_caps);
+out:
+ kfree(hv_caps);
+ return rc;
+}
+machine_device_initcall(pseries, pseries_vas_init);