aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2022-05-11 13:08:49 -0600
committerAlex Williamson <alex.williamson@redhat.com>2022-05-11 13:08:49 -0600
commit920df8d6ef122a4129960d410209ee92614667ca (patch)
treeb4e158d96df0c04af5e80d5f4db53c85c6c9a129 /drivers/vfio
parentLinux 5.18-rc6 (diff)
parentvfio/mlx5: Run the SAVE state command in an async mode (diff)
downloadlinux-dev-920df8d6ef122a4129960d410209ee92614667ca.tar.xz
linux-dev-920df8d6ef122a4129960d410209ee92614667ca.zip
Merge tag 'mlx5-lm-parallel' of https://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux into v5.19/vfio/next
Improve mlx5 live migration driver From Yishai: This series improves mlx5 live migration driver in few aspects as of below. Refactor to enable running migration commands in parallel over the PF command interface. To achieve that we exposed from mlx5_core an API to let the VF be notified before that the PF command interface goes down/up. (e.g. PF reload upon health recovery). Once having the above functionality in place mlx5 vfio doesn't need any more to obtain the global PF lock upon using the command interface but can rely on the above mechanism to be in sync with the PF. This can enable parallel VFs migration over the PF command interface from kernel driver point of view. In addition, Moved to use the PF async command mode for the SAVE state command. This enables returning earlier to user space upon issuing successfully the command and improve latency by let things run in parallel. Alex, as this series touches mlx5_core we may need to send this in a pull request format to VFIO to avoid conflicts before acceptance. Link: https://lore.kernel.org/all/20220510090206.90374-1-yishaih@nvidia.com Signed-of-by: Leon Romanovsky <leonro@nvidia.com>
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c236
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h52
-rw-r--r--drivers/vfio/pci/mlx5/main.c122
3 files changed, 275 insertions, 135 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 5c9f9218cc1d..9b9f33ca270a 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -5,89 +5,157 @@
#include "cmd.h"
-int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
+static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
+ u16 *vhca_id);
+
+int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
- struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
- int ret;
- if (!mdev)
+ lockdep_assert_held(&mvdev->state_mutex);
+ if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
- MLX5_SET(suspend_vhca_in, in, vhca_id, vhca_id);
+ MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
- ret = mlx5_cmd_exec_inout(mdev, suspend_vhca, in, out);
- mlx5_vf_put_core_dev(mdev);
- return ret;
+ return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
}
-int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
+int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
- struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
- int ret;
- if (!mdev)
+ lockdep_assert_held(&mvdev->state_mutex);
+ if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
- MLX5_SET(resume_vhca_in, in, vhca_id, vhca_id);
+ MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
- ret = mlx5_cmd_exec_inout(mdev, resume_vhca, in, out);
- mlx5_vf_put_core_dev(mdev);
- return ret;
+ return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out);
}
-int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
+int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size)
{
- struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
int ret;
- if (!mdev)
+ lockdep_assert_held(&mvdev->state_mutex);
+ if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(query_vhca_migration_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
- MLX5_SET(query_vhca_migration_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
- ret = mlx5_cmd_exec_inout(mdev, query_vhca_migration_state, in, out);
+ ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
+ out);
if (ret)
- goto end;
+ return ret;
*state_size = MLX5_GET(query_vhca_migration_state_out, out,
required_umem_size);
+ return 0;
+}
+
+static int mlx5fv_vf_event(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct mlx5vf_pci_core_device *mvdev =
+ container_of(nb, struct mlx5vf_pci_core_device, nb);
+
+ mutex_lock(&mvdev->state_mutex);
+ switch (event) {
+ case MLX5_PF_NOTIFY_ENABLE_VF:
+ mvdev->mdev_detach = false;
+ break;
+ case MLX5_PF_NOTIFY_DISABLE_VF:
+ mlx5vf_disable_fds(mvdev);
+ mvdev->mdev_detach = true;
+ break;
+ default:
+ break;
+ }
+ mlx5vf_state_mutex_unlock(mvdev);
+ return 0;
+}
+
+void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
+{
+ if (!mvdev->migrate_cap)
+ return;
+
+ mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id,
+ &mvdev->nb);
+ destroy_workqueue(mvdev->cb_wq);
+}
+
+void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev)
+{
+ struct pci_dev *pdev = mvdev->core_device.pdev;
+ int ret;
+
+ if (!pdev->is_virtfn)
+ return;
+
+ mvdev->mdev = mlx5_vf_get_core_dev(pdev);
+ if (!mvdev->mdev)
+ return;
+
+ if (!MLX5_CAP_GEN(mvdev->mdev, migration))
+ goto end;
+
+ mvdev->vf_id = pci_iov_vf_id(pdev);
+ if (mvdev->vf_id < 0)
+ goto end;
+
+ if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
+ &mvdev->vhca_id))
+ goto end;
+
+ mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0);
+ if (!mvdev->cb_wq)
+ goto end;
+
+ mutex_init(&mvdev->state_mutex);
+ spin_lock_init(&mvdev->reset_lock);
+ mvdev->nb.notifier_call = mlx5fv_vf_event;
+ ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id,
+ &mvdev->nb);
+ if (ret) {
+ destroy_workqueue(mvdev->cb_wq);
+ goto end;
+ }
+
+ mvdev->migrate_cap = 1;
+ mvdev->core_device.vdev.migration_flags =
+ VFIO_MIGRATION_STOP_COPY |
+ VFIO_MIGRATION_P2P;
end:
- mlx5_vf_put_core_dev(mdev);
- return ret;
+ mlx5_vf_put_core_dev(mvdev->mdev);
}
-int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id)
+static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
+ u16 *vhca_id)
{
- struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
int out_size;
void *out;
int ret;
- if (!mdev)
- return -ENOTCONN;
-
out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
out = kzalloc(out_size, GFP_KERNEL);
- if (!out) {
- ret = -ENOMEM;
- goto end;
- }
+ if (!out)
+ return -ENOMEM;
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, other_function, 1);
@@ -105,8 +173,6 @@ int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id)
err_exec:
kfree(out);
-end:
- mlx5_vf_put_core_dev(mdev);
return ret;
}
@@ -151,21 +217,68 @@ static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
return err;
}
-int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
+{
+ struct mlx5vf_async_data *async_data = container_of(_work,
+ struct mlx5vf_async_data, work);
+ struct mlx5_vf_migration_file *migf = container_of(async_data,
+ struct mlx5_vf_migration_file, async_data);
+ struct mlx5_core_dev *mdev = migf->mvdev->mdev;
+
+ mutex_lock(&migf->lock);
+ if (async_data->status) {
+ migf->is_err = true;
+ wake_up_interruptible(&migf->poll_wait);
+ }
+ mutex_unlock(&migf->lock);
+
+ mlx5_core_destroy_mkey(mdev, async_data->mkey);
+ dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
+ mlx5_core_dealloc_pd(mdev, async_data->pdn);
+ kvfree(async_data->out);
+ fput(migf->filp);
+}
+
+static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5vf_async_data *async_data = container_of(context,
+ struct mlx5vf_async_data, cb_work);
+ struct mlx5_vf_migration_file *migf = container_of(async_data,
+ struct mlx5_vf_migration_file, async_data);
+
+ if (!status) {
+ WRITE_ONCE(migf->total_length,
+ MLX5_GET(save_vhca_state_out, async_data->out,
+ actual_image_size));
+ wake_up_interruptible(&migf->poll_wait);
+ }
+
+ /*
+ * The error and the cleanup flows can't run from an
+ * interrupt context
+ */
+ async_data->status = status;
+ queue_work(migf->mvdev->cb_wq, &async_data->work);
+}
+
+int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf)
{
- struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
- u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
+ u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
+ struct mlx5vf_async_data *async_data;
+ struct mlx5_core_dev *mdev;
u32 pdn, mkey;
int err;
- if (!mdev)
+ lockdep_assert_held(&mvdev->state_mutex);
+ if (mvdev->mdev_detach)
return -ENOTCONN;
+ mdev = mvdev->mdev;
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
- goto end;
+ return err;
err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
0);
@@ -179,45 +292,54 @@ int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
MLX5_SET(save_vhca_state_in, in, opcode,
MLX5_CMD_OP_SAVE_VHCA_STATE);
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
- MLX5_SET(save_vhca_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(save_vhca_state_in, in, mkey, mkey);
MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
- err = mlx5_cmd_exec_inout(mdev, save_vhca_state, in, out);
+ async_data = &migf->async_data;
+ async_data->out = kvzalloc(out_size, GFP_KERNEL);
+ if (!async_data->out) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ /* no data exists till the callback comes back */
+ migf->total_length = 0;
+ get_file(migf->filp);
+ async_data->mkey = mkey;
+ async_data->pdn = pdn;
+ err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
+ async_data->out,
+ out_size, mlx5vf_save_callback,
+ &async_data->cb_work);
if (err)
goto err_exec;
- migf->total_length =
- MLX5_GET(save_vhca_state_out, out, actual_image_size);
-
- mlx5_core_destroy_mkey(mdev, mkey);
- mlx5_core_dealloc_pd(mdev, pdn);
- dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
- mlx5_vf_put_core_dev(mdev);
-
return 0;
err_exec:
+ fput(migf->filp);
+ kvfree(async_data->out);
+err_out:
mlx5_core_destroy_mkey(mdev, mkey);
err_create_mkey:
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
err_dma_map:
mlx5_core_dealloc_pd(mdev, pdn);
-end:
- mlx5_vf_put_core_dev(mdev);
return err;
}
-int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf)
{
- struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ struct mlx5_core_dev *mdev;
u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
u32 pdn, mkey;
int err;
- if (!mdev)
+ lockdep_assert_held(&mvdev->state_mutex);
+ if (mvdev->mdev_detach)
return -ENOTCONN;
mutex_lock(&migf->lock);
@@ -226,6 +348,7 @@ int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
goto end;
}
+ mdev = mvdev->mdev;
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
goto end;
@@ -241,7 +364,7 @@ int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
MLX5_SET(load_vhca_state_in, in, opcode,
MLX5_CMD_OP_LOAD_VHCA_STATE);
MLX5_SET(load_vhca_state_in, in, op_mod, 0);
- MLX5_SET(load_vhca_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(load_vhca_state_in, in, mkey, mkey);
MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
@@ -253,7 +376,6 @@ err_mkey:
err_reg:
mlx5_core_dealloc_pd(mdev, pdn);
end:
- mlx5_vf_put_core_dev(mdev);
mutex_unlock(&migf->lock);
return err;
}
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 1392a11a9cc0..6c3112fdd8b1 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -7,12 +7,23 @@
#define MLX5_VFIO_CMD_H
#include <linux/kernel.h>
+#include <linux/vfio_pci_core.h>
#include <linux/mlx5/driver.h>
+struct mlx5vf_async_data {
+ struct mlx5_async_work cb_work;
+ struct work_struct work;
+ int status;
+ u32 pdn;
+ u32 mkey;
+ void *out;
+};
+
struct mlx5_vf_migration_file {
struct file *filp;
struct mutex lock;
- bool disabled;
+ u8 disabled:1;
+ u8 is_err:1;
struct sg_append_table table;
size_t total_length;
@@ -22,15 +33,42 @@ struct mlx5_vf_migration_file {
struct scatterlist *last_offset_sg;
unsigned int sg_last_entry;
unsigned long last_offset;
+ struct mlx5vf_pci_core_device *mvdev;
+ wait_queue_head_t poll_wait;
+ struct mlx5_async_ctx async_ctx;
+ struct mlx5vf_async_data async_data;
+};
+
+struct mlx5vf_pci_core_device {
+ struct vfio_pci_core_device core_device;
+ int vf_id;
+ u16 vhca_id;
+ u8 migrate_cap:1;
+ u8 deferred_reset:1;
+ u8 mdev_detach:1;
+ /* protect migration state */
+ struct mutex state_mutex;
+ enum vfio_device_mig_state mig_state;
+ /* protect the reset_done flow */
+ spinlock_t reset_lock;
+ struct mlx5_vf_migration_file *resuming_migf;
+ struct mlx5_vf_migration_file *saving_migf;
+ struct workqueue_struct *cb_wq;
+ struct notifier_block nb;
+ struct mlx5_core_dev *mdev;
};
-int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
-int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
-int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
+int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
+int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
+int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size);
-int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id);
-int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev);
+void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
+int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf);
-int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf);
+void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
+void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
+void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
#endif /* MLX5_VFIO_CMD_H */
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index bbec5d288fee..df8b572977da 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -17,7 +17,6 @@
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <linux/sched/mm.h>
-#include <linux/vfio_pci_core.h>
#include <linux/anon_inodes.h>
#include "cmd.h"
@@ -25,20 +24,6 @@
/* Arbitrary to prevent userspace from consuming endless memory */
#define MAX_MIGRATION_SIZE (512*1024*1024)
-struct mlx5vf_pci_core_device {
- struct vfio_pci_core_device core_device;
- u16 vhca_id;
- u8 migrate_cap:1;
- u8 deferred_reset:1;
- /* protect migration state */
- struct mutex state_mutex;
- enum vfio_device_mig_state mig_state;
- /* protect the reset_done flow */
- spinlock_t reset_lock;
- struct mlx5_vf_migration_file *resuming_migf;
- struct mlx5_vf_migration_file *saving_migf;
-};
-
static struct page *
mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
unsigned long offset)
@@ -149,12 +134,22 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
return -ESPIPE;
pos = &filp->f_pos;
+ if (!(filp->f_flags & O_NONBLOCK)) {
+ if (wait_event_interruptible(migf->poll_wait,
+ READ_ONCE(migf->total_length) || migf->is_err))
+ return -ERESTARTSYS;
+ }
+
mutex_lock(&migf->lock);
+ if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) {
+ done = -EAGAIN;
+ goto out_unlock;
+ }
if (*pos > migf->total_length) {
done = -EINVAL;
goto out_unlock;
}
- if (migf->disabled) {
+ if (migf->disabled || migf->is_err) {
done = -ENODEV;
goto out_unlock;
}
@@ -194,9 +189,28 @@ out_unlock:
return done;
}
+static __poll_t mlx5vf_save_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct mlx5_vf_migration_file *migf = filp->private_data;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &migf->poll_wait, wait);
+
+ mutex_lock(&migf->lock);
+ if (migf->disabled || migf->is_err)
+ pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ else if (READ_ONCE(migf->total_length))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ mutex_unlock(&migf->lock);
+
+ return pollflags;
+}
+
static const struct file_operations mlx5vf_save_fops = {
.owner = THIS_MODULE,
.read = mlx5vf_save_read,
+ .poll = mlx5vf_save_poll,
.release = mlx5vf_release_file,
.llseek = no_llseek,
};
@@ -222,9 +236,11 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock);
-
- ret = mlx5vf_cmd_query_vhca_migration_state(
- mvdev->core_device.pdev, mvdev->vhca_id, &migf->total_length);
+ init_waitqueue_head(&migf->poll_wait);
+ mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
+ INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
+ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
+ &migf->total_length);
if (ret)
goto out_free;
@@ -233,8 +249,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
if (ret)
goto out_free;
- ret = mlx5vf_cmd_save_vhca_state(mvdev->core_device.pdev,
- mvdev->vhca_id, migf);
+ migf->mvdev = mvdev;
+ ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
if (ret)
goto out_free;
return migf;
@@ -339,7 +355,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
return migf;
}
-static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
+void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
{
if (mvdev->resuming_migf) {
mlx5vf_disable_fd(mvdev->resuming_migf);
@@ -347,6 +363,8 @@ static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
mvdev->resuming_migf = NULL;
}
if (mvdev->saving_migf) {
+ mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
+ cancel_work_sync(&mvdev->saving_migf->async_data.work);
mlx5vf_disable_fd(mvdev->saving_migf);
fput(mvdev->saving_migf->filp);
mvdev->saving_migf = NULL;
@@ -361,8 +379,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
int ret;
if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
- ret = mlx5vf_cmd_suspend_vhca(
- mvdev->core_device.pdev, mvdev->vhca_id,
+ ret = mlx5vf_cmd_suspend_vhca(mvdev,
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
if (ret)
return ERR_PTR(ret);
@@ -370,8 +387,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
}
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
- ret = mlx5vf_cmd_resume_vhca(
- mvdev->core_device.pdev, mvdev->vhca_id,
+ ret = mlx5vf_cmd_resume_vhca(mvdev,
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER);
if (ret)
return ERR_PTR(ret);
@@ -379,8 +395,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
}
if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
- ret = mlx5vf_cmd_suspend_vhca(
- mvdev->core_device.pdev, mvdev->vhca_id,
+ ret = mlx5vf_cmd_suspend_vhca(mvdev,
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
if (ret)
return ERR_PTR(ret);
@@ -388,8 +403,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
}
if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
- ret = mlx5vf_cmd_resume_vhca(
- mvdev->core_device.pdev, mvdev->vhca_id,
+ ret = mlx5vf_cmd_resume_vhca(mvdev,
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
if (ret)
return ERR_PTR(ret);
@@ -424,8 +438,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
}
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
- ret = mlx5vf_cmd_load_vhca_state(mvdev->core_device.pdev,
- mvdev->vhca_id,
+ ret = mlx5vf_cmd_load_vhca_state(mvdev,
mvdev->resuming_migf);
if (ret)
return ERR_PTR(ret);
@@ -444,7 +457,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
* This function is called in all state_mutex unlock cases to
* handle a 'deferred_reset' if exists.
*/
-static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
+void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
{
again:
spin_lock(&mvdev->reset_lock);
@@ -532,34 +545,16 @@ static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
struct mlx5vf_pci_core_device *mvdev = container_of(
core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
struct vfio_pci_core_device *vdev = &mvdev->core_device;
- int vf_id;
int ret;
ret = vfio_pci_core_enable(vdev);
if (ret)
return ret;
- if (!mvdev->migrate_cap) {
- vfio_pci_core_finish_enable(vdev);
- return 0;
- }
-
- vf_id = pci_iov_vf_id(vdev->pdev);
- if (vf_id < 0) {
- ret = vf_id;
- goto out_disable;
- }
-
- ret = mlx5vf_cmd_get_vhca_id(vdev->pdev, vf_id + 1, &mvdev->vhca_id);
- if (ret)
- goto out_disable;
-
- mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ if (mvdev->migrate_cap)
+ mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
vfio_pci_core_finish_enable(vdev);
return 0;
-out_disable:
- vfio_pci_core_disable(vdev);
- return ret;
}
static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
@@ -596,24 +591,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
if (!mvdev)
return -ENOMEM;
vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops);
-
- if (pdev->is_virtfn) {
- struct mlx5_core_dev *mdev =
- mlx5_vf_get_core_dev(pdev);
-
- if (mdev) {
- if (MLX5_CAP_GEN(mdev, migration)) {
- mvdev->migrate_cap = 1;
- mvdev->core_device.vdev.migration_flags =
- VFIO_MIGRATION_STOP_COPY |
- VFIO_MIGRATION_P2P;
- mutex_init(&mvdev->state_mutex);
- spin_lock_init(&mvdev->reset_lock);
- }
- mlx5_vf_put_core_dev(mdev);
- }
- }
-
+ mlx5vf_cmd_set_migratable(mvdev);
ret = vfio_pci_core_register_device(&mvdev->core_device);
if (ret)
goto out_free;
@@ -622,6 +600,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
return 0;
out_free:
+ mlx5vf_cmd_remove_migratable(mvdev);
vfio_pci_core_uninit_device(&mvdev->core_device);
kfree(mvdev);
return ret;
@@ -632,6 +611,7 @@ static void mlx5vf_pci_remove(struct pci_dev *pdev)
struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev);
vfio_pci_core_unregister_device(&mvdev->core_device);
+ mlx5vf_cmd_remove_migratable(mvdev);
vfio_pci_core_uninit_device(&mvdev->core_device);
kfree(mvdev);
}