diff options
author | David S. Miller <davem@davemloft.net> | 2019-08-22 00:25:12 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-08-22 00:25:12 -0700 |
commit | 8da3803d1ac49a9e47e9059fecd07167d4139389 (patch) | |
tree | b352f82d59d203d1f0f1daf6d36a8dbde496b41d | |
parent | Merge tag 'mlx5-updates-2019-08-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux (diff) | |
parent | net/mlx5e: Add mlx5e HV VHCA stats agent (diff) | |
download | linux-dev-8da3803d1ac49a9e47e9059fecd07167d4139389.tar.xz linux-dev-8da3803d1ac49a9e47e9059fecd07167d4139389.zip |
Merge branch 'mlx5-hyperv'
Haiyang Zhang says:
====================
Add software backchannel and mlx5e HV VHCA stats
This patch set adds paravirtual backchannel in software in pci_hyperv,
which is required by the mlx5e driver HV VHCA stats agent.
The stats agent is responsible on running a periodic rx/tx packets/bytes
stats update.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 25 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 371 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 104 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 | ||||
-rw-r--r-- | drivers/pci/Kconfig | 1 | ||||
-rw-r--r-- | drivers/pci/controller/Kconfig | 7 | ||||
-rw-r--r-- | drivers/pci/controller/Makefile | 1 | ||||
-rw-r--r-- | drivers/pci/controller/pci-hyperv-intf.c | 67 | ||||
-rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 308 | ||||
-rw-r--r-- | include/linux/hyperv.h | 29 | ||||
-rw-r--r-- | include/linux/mlx5/driver.h | 2 |
18 files changed, 1189 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a406947b369e..986085351d79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7469,6 +7469,7 @@ F: drivers/hid/hid-hyperv.c F: drivers/hv/ F: drivers/input/serio/hyperv-keyboard.c F: drivers/pci/controller/pci-hyperv.c +F: drivers/pci/controller/pci-hyperv-intf.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index bcf36552f069..f4de9ccb5df1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -36,6 +36,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ en/tc_tun_geneve.o diag/en_tc_tracepoint.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o # # Core extra @@ -45,6 +46,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o # # Ipoib netdev diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7316571a4df5..446792799125 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -54,6 +54,7 @@ #include "mlx5_core.h" #include "en_stats.h" #include "en/fs.h" +#include "lib/hv_vhca.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -782,6 +783,15 @@ struct mlx5e_modify_sq_param { int rl_index; }; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) +struct mlx5e_hv_vhca_stats_agent { + struct mlx5_hv_vhca_agent *agent; + struct delayed_work work; + u16 delay; + void *buf; +}; +#endif + struct mlx5e_xsk { /* UMEMs are stored separately from channels, because we don't want to * lose them when channels are recreated. The kernel also stores UMEMs, @@ -853,6 +863,9 @@ struct mlx5e_priv { struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; struct mlx5e_xsk xsk; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + struct mlx5e_hv_vhca_stats_agent stats_agent; +#endif }; struct mlx5e_profile { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c new file mode 100644 index 000000000000..c37b4acd9bd5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include "en.h" +#include "en/hv_vhca_stats.h" +#include "lib/hv_vhca.h" +#include "lib/hv.h" + +struct mlx5e_hv_vhca_per_ring_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; +}; + +static void +mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, + struct mlx5e_hv_vhca_per_ring_stats *data) +{ + struct mlx5e_channel_stats *stats; + int tc; + + stats = &priv->channel_stats[ch]; + data->rx_packets = stats->rq.packets; + data->rx_bytes = stats->rq.bytes; + + for (tc = 0; tc < priv->max_opened_tc; tc++) { + data->tx_packets += stats->sq[tc].packets; + data->tx_bytes += stats->sq[tc].bytes; + } +} + +static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, u64 *data, + int buf_len) +{ + int ch, i = 0; + + for (ch = 0; ch < priv->max_nch; ch++) { + u64 *buf = data + i; + + if (WARN_ON_ONCE(buf + + sizeof(struct mlx5e_hv_vhca_per_ring_stats) > + data + buf_len)) + return; + + mlx5e_hv_vhca_fill_ring_stats(priv, ch, + (struct mlx5e_hv_vhca_per_ring_stats *)buf); + i += sizeof(struct mlx5e_hv_vhca_per_ring_stats) / sizeof(u64); + } +} + +static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv) +{ + return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) * + priv->max_nch); +} + +static void mlx5e_hv_vhca_stats_work(struct work_struct *work) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5_hv_vhca_agent *agent; + struct delayed_work *dwork; + struct mlx5e_priv *priv; + int buf_len, rc; + void *buf; + + dwork = to_delayed_work(work); + sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work); + priv = container_of(sagent, struct mlx5e_priv, stats_agent); + buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + agent = sagent->agent; + buf = sagent->buf; + + memset(buf, 0, buf_len); + mlx5e_hv_vhca_fill_stats(priv, buf, buf_len); + + rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len); + if (rc) { + mlx5_core_err(priv->mdev, + "%s: Failed to write stats, err = %d\n", + __func__, rc); + return; + } + + if (sagent->delay) + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +enum { + MLX5_HV_VHCA_STATS_VERSION = 1, + MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF, +}; + +static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5e_priv *priv; + + priv = mlx5_hv_vhca_agent_priv(agent); + sagent = &priv->stats_agent; + + block->version = MLX5_HV_VHCA_STATS_VERSION; + block->rings = priv->max_nch; + + if (!block->command) { + cancel_delayed_work_sync(&priv->stats_agent.work); + return; + } + + sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 : + msecs_to_jiffies(block->command * 100); + + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent); + + cancel_delayed_work_sync(&priv->stats_agent.work); +} + +int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + struct mlx5_hv_vhca_agent *agent; + + priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); + if (!priv->stats_agent.buf) + return -ENOMEM; + + agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, + MLX5_HV_VHCA_AGENT_STATS, + mlx5e_hv_vhca_stats_control, NULL, + mlx5e_hv_vhca_stats_cleanup, + priv); + + if (IS_ERR_OR_NULL(agent)) { + if (IS_ERR(agent)) + netdev_warn(priv->netdev, + "Failed to create hv vhca stats agent, err = %ld\n", + PTR_ERR(agent)); + + kfree(priv->stats_agent.buf); + return IS_ERR_OR_NULL(agent); + } + + priv->stats_agent.agent = agent; + INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); + + return 0; +} + +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->stats_agent.agent)) + return; + + mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent); + kfree(priv->stats_agent.buf); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h new file mode 100644 index 000000000000..664463faf77b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_STATS_VHCA_H__ +#define __MLX5_EN_STATS_VHCA_H__ +#include "en.h" + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); + +#else + +static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ +} +#endif + +#endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7fdea6479ff6..fa4bf2d4bcd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -62,6 +62,7 @@ #include "en/xsk/setup.h" #include "en/xsk/rx.h" #include "en/xsk/tx.h" +#include "en/hv_vhca_stats.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) @@ -5109,6 +5110,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_init(priv); + mlx5e_hv_vhca_stats_create(priv); if (netdev->reg_state != NETREG_REGISTERED) return; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -5141,6 +5143,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c new file mode 100644 index 000000000000..cf08d02703fb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" + +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len, + int offset, bool read) +{ + int rc = -EOPNOTSUPP; + int bytes_returned; + int block_id; + + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX; + + rc = read ? + hyperv_read_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id, + &bytes_returned) : + hyperv_write_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id); + + /* Make sure len bytes were read successfully */ + if (read) + rc |= !(len == bytes_returned); + + if (rc) { + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n", + read ? "read" : "write", rc, len, + offset); + return rc; + } + + return 0; +} + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, true); +} + +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, false); +} + +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + return hyperv_reg_block_invalidate(dev->pdev, context, + block_invalidate); +} + +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev) +{ + hyperv_reg_block_invalidate(dev->pdev, NULL, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h new file mode 100644 index 000000000000..f9a45573f459 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_H__ +#define __LIB_HV_H__ + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +#include <linux/hyperv.h> +#include <linux/mlx5/driver.h> + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev); +#endif + +#endif /* __LIB_HV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c new file mode 100644 index 000000000000..4047629a876b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" +#include "lib/hv_vhca.h" + +struct mlx5_hv_vhca { + struct mlx5_core_dev *dev; + struct workqueue_struct *work_queue; + struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX]; + struct mutex agents_lock; /* Protect agents array */ +}; + +struct mlx5_hv_vhca_work { + struct work_struct invalidate_work; + struct mlx5_hv_vhca *hv_vhca; + u64 block_mask; +}; + +struct mlx5_hv_vhca_data_block { + u16 sequence; + u16 offset; + u8 reserved[4]; + u64 data[15]; +}; + +struct mlx5_hv_vhca_agent { + enum mlx5_hv_vhca_agent_type type; + struct mlx5_hv_vhca *hv_vhca; + void *priv; + u16 seq; + void (*control)(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block); + void (*invalidate)(struct mlx5_hv_vhca_agent *agent, + u64 block_mask); + void (*cleanup)(struct mlx5_hv_vhca_agent *agent); +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + struct mlx5_hv_vhca *hv_vhca = NULL; + + hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL); + if (!hv_vhca) + return ERR_PTR(-ENOMEM); + + hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca"); + if (!hv_vhca->work_queue) { + kfree(hv_vhca); + return ERR_PTR(-ENOMEM); + } + + hv_vhca->dev = dev; + mutex_init(&hv_vhca->agents_lock); + + return hv_vhca; +} + +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + destroy_workqueue(hv_vhca->work_queue); + kfree(hv_vhca); +} + +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work) +{ + struct mlx5_hv_vhca_work *hwork; + struct mlx5_hv_vhca *hv_vhca; + int i; + + hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work); + hv_vhca = hwork->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->invalidate) + continue; + + if (!(BIT(agent->type) & hwork->block_mask)) + continue; + + agent->invalidate(agent, hwork->block_mask); + } + mutex_unlock(&hv_vhca->agents_lock); + + kfree(hwork); +} + +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context; + struct mlx5_hv_vhca_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work); + work->hv_vhca = hv_vhca; + work->block_mask = block_mask; + + queue_work(hv_vhca->work_queue, &work->invalidate_work); +} + +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */) + +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca, + struct mlx5_hv_vhca_control_block *block) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->control) + continue; + + if (!(AGENT_MASK(agent->type) & block->control)) + continue; + + agent->control(agent, block); + } +} + +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca, + u32 *capabilities) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (agent) + *capabilities |= AGENT_MASK(agent->type); + } +} + +static void +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent, + u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + struct mlx5_core_dev *dev = hv_vhca->dev; + struct mlx5_hv_vhca_control_block *block; + u32 capabilities = 0; + int err; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return; + + err = mlx5_hv_read_config(dev, block, sizeof(*block), 0); + if (err) + goto free_block; + + mlx5_hv_vhca_capabilities(hv_vhca, &capabilities); + + /* In case no capabilities, send empty block in return */ + if (!capabilities) { + memset(block, 0, sizeof(*block)); + goto write; + } + + if (block->capabilities != capabilities) + block->capabilities = capabilities; + + if (block->control & ~capabilities) + goto free_block; + + mlx5_hv_vhca_agents_control(hv_vhca, block); + block->command_ack = block->command; + +write: + mlx5_hv_write_config(dev, block, sizeof(*block), 0); + +free_block: + kfree(block); +} + +static struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca) +{ + return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL, + NULL, + mlx5_hv_vhca_control_agent_invalidate, + NULL, NULL); +} + +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + mlx5_hv_vhca_agent_destroy(agent); +} + +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int err; + + if (IS_ERR_OR_NULL(hv_vhca)) + return IS_ERR_OR_NULL(hv_vhca); + + err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca, + mlx5_hv_vhca_invalidate); + if (err) + return err; + + agent = mlx5_hv_vhca_control_agent_create(hv_vhca); + if (IS_ERR_OR_NULL(agent)) { + mlx5_hv_unregister_invalidate(hv_vhca->dev); + return IS_ERR_OR_NULL(agent); + } + + hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent; + + return 0; +} + +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int i; + + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL]; + if (agent) + mlx5_hv_vhca_control_agent_destroy(agent); + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) + WARN_ON(hv_vhca->agents[i]); + + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_unregister_invalidate(hv_vhca->dev); +} + +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca) +{ + mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL)); +} + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleaup)(struct mlx5_hv_vhca_agent *agent), + void *priv) +{ + struct mlx5_hv_vhca_agent *agent; + + if (IS_ERR_OR_NULL(hv_vhca)) + return ERR_PTR(-ENOMEM); + + if (type >= MLX5_HV_VHCA_AGENT_MAX) + return ERR_PTR(-EINVAL); + + mutex_lock(&hv_vhca->agents_lock); + if (hv_vhca->agents[type]) { + mutex_unlock(&hv_vhca->agents_lock); + return ERR_PTR(-EINVAL); + } + mutex_unlock(&hv_vhca->agents_lock); + + agent = kzalloc(sizeof(*agent), GFP_KERNEL); + if (!agent) + return ERR_PTR(-ENOMEM); + + agent->type = type; + agent->hv_vhca = hv_vhca; + agent->priv = priv; + agent->control = control; + agent->invalidate = invalidate; + agent->cleanup = cleaup; + + mutex_lock(&hv_vhca->agents_lock); + hv_vhca->agents[type] = agent; + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_vhca_agents_update(hv_vhca); + + return agent; +} + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + + if (WARN_ON(agent != hv_vhca->agents[agent->type])) { + mutex_unlock(&hv_vhca->agents_lock); + return; + } + + hv_vhca->agents[agent->type] = NULL; + mutex_unlock(&hv_vhca->agents_lock); + + if (agent->cleanup) + agent->cleanup(agent); + + kfree(agent); + + mlx5_hv_vhca_agents_update(hv_vhca); +} + +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_data_block *data_block, + void *src, int len, int *offset) +{ + int bytes = min_t(int, (int)sizeof(data_block->data), len); + + data_block->sequence = agent->seq; + data_block->offset = (*offset)++; + memcpy(data_block->data, src, bytes); + + return bytes; +} + +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent) +{ + agent->seq++; +} + +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX; + int block_offset = 0; + int total = 0; + int err; + + while (len) { + struct mlx5_hv_vhca_data_block data_block = {0}; + int bytes; + + bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block, + buf + total, + len, &block_offset); + if (!bytes) + return -ENOMEM; + + err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block, + sizeof(data_block), offset); + if (err) + return err; + + total += bytes; + len -= bytes; + } + + mlx5_hv_vhca_agent_seq_update(agent); + + return 0; +} + +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent) +{ + return agent->priv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h new file mode 100644 index 000000000000..4bad6a5fde56 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_VHCA_H__ +#define __LIB_HV_VHCA_H__ + +#include "en.h" +#include "lib/hv.h" + +struct mlx5_hv_vhca_agent; +struct mlx5_hv_vhca; +struct mlx5_hv_vhca_control_block; + +enum mlx5_hv_vhca_agent_type { + MLX5_HV_VHCA_AGENT_CONTROL = 0, + MLX5_HV_VHCA_AGENT_STATS = 1, + MLX5_HV_VHCA_AGENT_MAX = 32, +}; + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +struct mlx5_hv_vhca_control_block { + u32 capabilities; + u32 control; + u16 command; + u16 command_ack; + u16 version; + u16 rings; + u32 reserved1[28]; +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev); +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca); +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask); + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context); + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent); +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len); +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent); + +#else + +static inline struct mlx5_hv_vhca * +mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + return 0; +} + +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline void mlx5_hv_vhca_invalidate(void *context, + u64 block_mask) +{ +} + +static inline struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ +} + +static inline int +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + return 0; +} +#endif + +#endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0b70b1d6338d..61388ca7233b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -69,6 +69,7 @@ #include "lib/pci_vsc.h" #include "diag/fw_tracer.h" #include "ecpf.h" +#include "lib/hv_vhca.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -870,6 +871,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) } dev->tracer = mlx5_fw_tracer_create(dev); + dev->hv_vhca = mlx5_hv_vhca_create(dev); return 0; @@ -900,6 +902,7 @@ err_devcom: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); @@ -1067,6 +1070,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_fw_tracer; } + mlx5_hv_vhca_init(dev->hv_vhca); + err = mlx5_fpga_device_start(dev); if (err) { mlx5_core_err(dev, "fpga device start failed %d\n", err); @@ -1122,6 +1127,7 @@ err_tls_start: err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: + mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: mlx5_eq_table_destroy(dev); @@ -1142,6 +1148,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); + mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2ab92409210a..c313de96a357 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -182,6 +182,7 @@ config PCI_LABEL config PCI_HYPERV tristate "Hyper-V PCI Frontend" depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64 + select PCI_HYPERV_INTERFACE help The PCI device frontend driver allows the kernel to import arbitrary PCI devices from a PCI backend to support PCI driver domains. diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index fe9f9f13ce11..70e078238899 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -281,5 +281,12 @@ config VMD To compile this driver as a module, choose M here: the module will be called vmd. +config PCI_HYPERV_INTERFACE + tristate "Hyper-V PCI Interface" + depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64 + help + The Hyper-V PCI Interface is a helper driver allows other drivers to + have a common interface with the Hyper-V PCI frontend driver. + source "drivers/pci/controller/dwc/Kconfig" endmenu diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index d56a507495c5..a2a22c9d91af 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o +obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c new file mode 100644 index 000000000000..cc96be450360 --- /dev/null +++ b/drivers/pci/controller/pci-hyperv-intf.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) Microsoft Corporation. + * + * Author: + * Haiyang Zhang <haiyangz@microsoft.com> + * + * This small module is a helper driver allows other drivers to + * have a common interface with the Hyper-V PCI frontend driver. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/hyperv.h> + +struct hyperv_pci_block_ops hvpci_block_ops; +EXPORT_SYMBOL_GPL(hvpci_block_ops); + +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len, + unsigned int block_id, unsigned int *bytes_returned) +{ + if (!hvpci_block_ops.read_block) + return -EOPNOTSUPP; + + return hvpci_block_ops.read_block(dev, buf, buf_len, block_id, + bytes_returned); +} +EXPORT_SYMBOL_GPL(hyperv_read_cfg_blk); + +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len, + unsigned int block_id) +{ + if (!hvpci_block_ops.write_block) + return -EOPNOTSUPP; + + return hvpci_block_ops.write_block(dev, buf, len, block_id); +} +EXPORT_SYMBOL_GPL(hyperv_write_cfg_blk); + +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + if (!hvpci_block_ops.reg_blk_invalidate) + return -EOPNOTSUPP; + + return hvpci_block_ops.reg_blk_invalidate(dev, context, + block_invalidate); +} +EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate); + +static void __exit exit_hv_pci_intf(void) +{ +} + +static int __init init_hv_pci_intf(void) +{ + return 0; +} + +module_init(init_hv_pci_intf); +module_exit(exit_hv_pci_intf); + +MODULE_DESCRIPTION("Hyper-V PCI Interface"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 40b625458afa..9c93ac2215b7 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -365,6 +365,39 @@ struct pci_delete_interrupt { struct tran_int_desc int_desc; } __packed; +/* + * Note: the VM must pass a valid block id, wslot and bytes_requested. + */ +struct pci_read_block { + struct pci_message message_type; + u32 block_id; + union win_slot_encoding wslot; + u32 bytes_requested; +} __packed; + +struct pci_read_block_response { + struct vmpacket_descriptor hdr; + u32 status; + u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX]; +} __packed; + +/* + * Note: the VM must pass a valid block id, wslot and byte_count. + */ +struct pci_write_block { + struct pci_message message_type; + u32 block_id; + union win_slot_encoding wslot; + u32 byte_count; + u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX]; +} __packed; + +struct pci_dev_inval_block { + struct pci_incoming_message incoming; + union win_slot_encoding wslot; + u64 block_mask; +} __packed; + struct pci_dev_incoming { struct pci_incoming_message incoming; union win_slot_encoding wslot; @@ -499,6 +532,9 @@ struct hv_pci_dev { struct hv_pcibus_device *hbus; struct work_struct wrk; + void (*block_invalidate)(void *context, u64 block_mask); + void *invalidate_context; + /* * What would be observed if one wrote 0xFFFFFFFF to a BAR and then * read it back, for each of the BAR offsets within config space. @@ -817,6 +853,253 @@ static struct pci_ops hv_pcifront_ops = { .write = hv_pcifront_write_config, }; +/* + * Paravirtual backchannel + * + * Hyper-V SR-IOV provides a backchannel mechanism in software for + * communication between a VF driver and a PF driver. These + * "configuration blocks" are similar in concept to PCI configuration space, + * but instead of doing reads and writes in 32-bit chunks through a very slow + * path, packets of up to 128 bytes can be sent or received asynchronously. + * + * Nearly every SR-IOV device contains just such a communications channel in + * hardware, so using this one in software is usually optional. Using the + * software channel, however, allows driver implementers to leverage software + * tools that fuzz the communications channel looking for vulnerabilities. + * + * The usage model for these packets puts the responsibility for reading or + * writing on the VF driver. The VF driver sends a read or a write packet, + * indicating which "block" is being referred to by number. + * + * If the PF driver wishes to initiate communication, it can "invalidate" one or + * more of the first 64 blocks. This invalidation is delivered via a callback + * supplied by the VF driver by this driver. + * + * No protocol is implied, except that supplied by the PF and VF drivers. + */ + +struct hv_read_config_compl { + struct hv_pci_compl comp_pkt; + void *buf; + unsigned int len; + unsigned int bytes_returned; +}; + +/** + * hv_pci_read_config_compl() - Invoked when a response packet + * for a read config block operation arrives. + * @context: Identifies the read config operation + * @resp: The response packet itself + * @resp_packet_size: Size in bytes of the response packet + */ +static void hv_pci_read_config_compl(void *context, struct pci_response *resp, + int resp_packet_size) +{ + struct hv_read_config_compl *comp = context; + struct pci_read_block_response *read_resp = + (struct pci_read_block_response *)resp; + unsigned int data_len, hdr_len; + + hdr_len = offsetof(struct pci_read_block_response, bytes); + if (resp_packet_size < hdr_len) { + comp->comp_pkt.completion_status = -1; + goto out; + } + + data_len = resp_packet_size - hdr_len; + if (data_len > 0 && read_resp->status == 0) { + comp->bytes_returned = min(comp->len, data_len); + memcpy(comp->buf, read_resp->bytes, comp->bytes_returned); + } else { + comp->bytes_returned = 0; + } + + comp->comp_pkt.completion_status = read_resp->status; +out: + complete(&comp->comp_pkt.host_event); +} + +/** + * hv_read_config_block() - Sends a read config block request to + * the back-end driver running in the Hyper-V parent partition. + * @pdev: The PCI driver's representation for this device. + * @buf: Buffer into which the config block will be copied. + * @len: Size in bytes of buf. + * @block_id: Identifies the config block which has been requested. + * @bytes_returned: Size which came back from the back-end driver. + * + * Return: 0 on success, -errno on failure + */ +int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len, + unsigned int block_id, unsigned int *bytes_returned) +{ + struct hv_pcibus_device *hbus = + container_of(pdev->bus->sysdata, struct hv_pcibus_device, + sysdata); + struct { + struct pci_packet pkt; + char buf[sizeof(struct pci_read_block)]; + } pkt; + struct hv_read_config_compl comp_pkt; + struct pci_read_block *read_blk; + int ret; + + if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + init_completion(&comp_pkt.comp_pkt.host_event); + comp_pkt.buf = buf; + comp_pkt.len = len; + + memset(&pkt, 0, sizeof(pkt)); + pkt.pkt.completion_func = hv_pci_read_config_compl; + pkt.pkt.compl_ctxt = &comp_pkt; + read_blk = (struct pci_read_block *)&pkt.pkt.message; + read_blk->message_type.type = PCI_READ_BLOCK; + read_blk->wslot.slot = devfn_to_wslot(pdev->devfn); + read_blk->block_id = block_id; + read_blk->bytes_requested = len; + + ret = vmbus_sendpacket(hbus->hdev->channel, read_blk, + sizeof(*read_blk), (unsigned long)&pkt.pkt, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) + return ret; + + ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event); + if (ret) + return ret; + + if (comp_pkt.comp_pkt.completion_status != 0 || + comp_pkt.bytes_returned == 0) { + dev_err(&hbus->hdev->device, + "Read Config Block failed: 0x%x, bytes_returned=%d\n", + comp_pkt.comp_pkt.completion_status, + comp_pkt.bytes_returned); + return -EIO; + } + + *bytes_returned = comp_pkt.bytes_returned; + return 0; +} + +/** + * hv_pci_write_config_compl() - Invoked when a response packet for a write + * config block operation arrives. + * @context: Identifies the write config operation + * @resp: The response packet itself + * @resp_packet_size: Size in bytes of the response packet + */ +static void hv_pci_write_config_compl(void *context, struct pci_response *resp, + int resp_packet_size) +{ + struct hv_pci_compl *comp_pkt = context; + + comp_pkt->completion_status = resp->status; + complete(&comp_pkt->host_event); +} + +/** + * hv_write_config_block() - Sends a write config block request to the + * back-end driver running in the Hyper-V parent partition. + * @pdev: The PCI driver's representation for this device. + * @buf: Buffer from which the config block will be copied. + * @len: Size in bytes of buf. + * @block_id: Identifies the config block which is being written. + * + * Return: 0 on success, -errno on failure + */ +int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len, + unsigned int block_id) +{ + struct hv_pcibus_device *hbus = + container_of(pdev->bus->sysdata, struct hv_pcibus_device, + sysdata); + struct { + struct pci_packet pkt; + char buf[sizeof(struct pci_write_block)]; + u32 reserved; + } pkt; + struct hv_pci_compl comp_pkt; + struct pci_write_block *write_blk; + u32 pkt_size; + int ret; + + if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + init_completion(&comp_pkt.host_event); + + memset(&pkt, 0, sizeof(pkt)); + pkt.pkt.completion_func = hv_pci_write_config_compl; + pkt.pkt.compl_ctxt = &comp_pkt; + write_blk = (struct pci_write_block *)&pkt.pkt.message; + write_blk->message_type.type = PCI_WRITE_BLOCK; + write_blk->wslot.slot = devfn_to_wslot(pdev->devfn); + write_blk->block_id = block_id; + write_blk->byte_count = len; + memcpy(write_blk->bytes, buf, len); + pkt_size = offsetof(struct pci_write_block, bytes) + len; + /* + * This quirk is required on some hosts shipped around 2018, because + * these hosts don't check the pkt_size correctly (new hosts have been + * fixed since early 2019). The quirk is also safe on very old hosts + * and new hosts, because, on them, what really matters is the length + * specified in write_blk->byte_count. + */ + pkt_size += sizeof(pkt.reserved); + + ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size, + (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) + return ret; + + ret = wait_for_response(hbus->hdev, &comp_pkt.host_event); + if (ret) + return ret; + + if (comp_pkt.completion_status != 0) { + dev_err(&hbus->hdev->device, + "Write Config Block failed: 0x%x\n", + comp_pkt.completion_status); + return -EIO; + } + + return 0; +} + +/** + * hv_register_block_invalidate() - Invoked when a config block invalidation + * arrives from the back-end driver. + * @pdev: The PCI driver's representation for this device. + * @context: Identifies the device. + * @block_invalidate: Identifies all of the blocks being invalidated. + * + * Return: 0 on success, -errno on failure + */ +int hv_register_block_invalidate(struct pci_dev *pdev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + struct hv_pcibus_device *hbus = + container_of(pdev->bus->sysdata, struct hv_pcibus_device, + sysdata); + struct hv_pci_dev *hpdev; + + hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn)); + if (!hpdev) + return -ENODEV; + + hpdev->block_invalidate = block_invalidate; + hpdev->invalidate_context = context; + + put_pcichild(hpdev); + return 0; + +} + /* Interrupt management hooks */ static void hv_int_desc_free(struct hv_pci_dev *hpdev, struct tran_int_desc *int_desc) @@ -1968,6 +2251,7 @@ static void hv_pci_onchannelcallback(void *context) struct pci_response *response; struct pci_incoming_message *new_message; struct pci_bus_relations *bus_rel; + struct pci_dev_inval_block *inval; struct pci_dev_incoming *dev_message; struct hv_pci_dev *hpdev; @@ -2045,6 +2329,21 @@ static void hv_pci_onchannelcallback(void *context) } break; + case PCI_INVALIDATE_BLOCK: + + inval = (struct pci_dev_inval_block *)buffer; + hpdev = get_pcichild_wslot(hbus, + inval->wslot.slot); + if (hpdev) { + if (hpdev->block_invalidate) { + hpdev->block_invalidate( + hpdev->invalidate_context, + inval->block_mask); + } + put_pcichild(hpdev); + } + break; + default: dev_warn(&hbus->hdev->device, "Unimplemented protocol message %x\n", @@ -2743,10 +3042,19 @@ static struct hv_driver hv_pci_drv = { static void __exit exit_hv_pci_drv(void) { vmbus_driver_unregister(&hv_pci_drv); + + hvpci_block_ops.read_block = NULL; + hvpci_block_ops.write_block = NULL; + hvpci_block_ops.reg_blk_invalidate = NULL; } static int __init init_hv_pci_drv(void) { + /* Initialize PCI block r/w interface */ + hvpci_block_ops.read_block = hv_read_config_block; + hvpci_block_ops.write_block = hv_write_config_block; + hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate; + return vmbus_driver_register(&hv_pci_drv); } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6256cc34c4a6..2afe6fdc1dda 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1578,4 +1578,33 @@ hv_pkt_iter_next(struct vmbus_channel *channel, for (pkt = hv_pkt_iter_first(channel); pkt; \ pkt = hv_pkt_iter_next(channel, pkt)) +/* + * Interface for passing data between SR-IOV PF and VF drivers. The VF driver + * sends requests to read and write blocks. Each block must be 128 bytes or + * smaller. Optionally, the VF driver can register a callback function which + * will be invoked when the host says that one or more of the first 64 block + * IDs is "invalid" which means that the VF driver should reread them. + */ +#define HV_CONFIG_BLOCK_SIZE_MAX 128 + +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len, + unsigned int block_id, unsigned int *bytes_returned); +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len, + unsigned int block_id); +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); + +struct hyperv_pci_block_ops { + int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len, + unsigned int block_id, unsigned int *bytes_returned); + int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len, + unsigned int block_id); + int (*reg_blk_invalidate)(struct pci_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); +}; + +extern struct hyperv_pci_block_ops hvpci_block_ops; + #endif /* _HYPERV_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df23f17eed64..13b4cf22f3ab 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -659,6 +659,7 @@ struct mlx5_clock { struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; +struct mlx5_hv_vhca; struct mlx5_core_dev { struct device *device; @@ -706,6 +707,7 @@ struct mlx5_core_dev { struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; u32 vsc_addr; + struct mlx5_hv_vhca *hv_vhca; }; struct mlx5_db { |