diff options
Diffstat (limited to 'drivers/virtio')
-rw-r--r-- | drivers/virtio/Kconfig | 13 | ||||
-rw-r--r-- | drivers/virtio/Makefile | 1 | ||||
-rw-r--r-- | drivers/virtio/virtio.c | 18 | ||||
-rw-r--r-- | drivers/virtio/virtio_dma_buf.c | 1 | ||||
-rw-r--r-- | drivers/virtio/virtio_mem.c | 302 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_common.c | 58 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_common.h | 16 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_legacy.c | 106 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_legacy_dev.c | 220 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_modern.c | 6 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 92 | ||||
-rw-r--r-- | drivers/virtio/virtio_vdpa.c | 19 |
12 files changed, 650 insertions, 202 deletions
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index ce1b3f6ec325..34f80b7a8a64 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -20,6 +20,15 @@ config VIRTIO_PCI_LIB PCI device with possible vendor specific extensions. Any module that selects this module must depend on PCI. +config VIRTIO_PCI_LIB_LEGACY + tristate + help + Legacy PCI device (Virtio PCI Card 0.9.x Draft and older device) + implementation. + This module implements the basic probe and control for devices + which are based on legacy PCI device. Any module that selects this + module must depend on PCI. + menuconfig VIRTIO_MENU bool "Virtio drivers" default y @@ -43,6 +52,7 @@ config VIRTIO_PCI_LEGACY bool "Support for legacy virtio draft 0.9.X and older devices" default y depends on VIRTIO_PCI + select VIRTIO_PCI_LIB_LEGACY help Virtio PCI Card 0.9.X Draft (circa 2014) and older device support. @@ -98,9 +108,10 @@ config VIRTIO_MEM default m depends on X86_64 depends on VIRTIO - depends on MEMORY_HOTPLUG_SPARSE + depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on CONTIG_ALLOC + depends on EXCLUSIVE_SYSTEM_RAM help This driver provides access to virtio-mem paravirtualized memory devices, allowing to hotplug and hotunplug memory. diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 699bbea0465f..0a82d0873248 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o +obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 588e02fb91d3..236081afe9a2 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -239,6 +239,17 @@ static int virtio_dev_probe(struct device *_d) driver_features_legacy = driver_features; } + /* + * Some devices detect legacy solely via F_VERSION_1. Write + * F_VERSION_1 to force LE config space accesses before FEATURES_OK for + * these when needed. + */ + if (drv->validate && !virtio_legacy_is_little_endian() + && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) { + dev->features = BIT_ULL(VIRTIO_F_VERSION_1); + dev->config->finalize_features(dev); + } + if (device_features & (1ULL << VIRTIO_F_VERSION_1)) dev->features = driver_features & device_features; else @@ -345,8 +356,13 @@ static int virtio_device_of_init(struct virtio_device *dev) ret = snprintf(compat, sizeof(compat), "virtio,device%x", dev->id.device); BUG_ON(ret >= sizeof(compat)); + /* + * On powerpc/pseries virtio devices are PCI devices so PCI + * vendor/device ids play the role of the "compatible" property. + * Simply don't init of_node in this case. + */ if (!of_device_is_compatible(np, compat)) { - ret = -EINVAL; + ret = 0; goto out; } diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c index 5127a2f0c986..2521a75009c3 100644 --- a/drivers/virtio/virtio_dma_buf.c +++ b/drivers/virtio/virtio_dma_buf.c @@ -86,3 +86,4 @@ int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf, EXPORT_SYMBOL(virtio_dma_buf_get_uuid); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(DMA_BUF); diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index bef8ad6bf466..96e5a8782769 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -223,6 +223,9 @@ struct virtio_mem { * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get * plugged/unplugged. + * + * In kdump mode, used to serialize requests, last_block_addr and + * last_block_plugged. */ struct mutex hotplug_mutex; bool hotplug_active; @@ -230,6 +233,9 @@ struct virtio_mem { /* An error occurred we cannot handle - stop processing requests. */ bool broken; + /* Cached valued of is_kdump_kernel() when the device was probed. */ + bool in_kdump; + /* The driver is being removed. */ spinlock_t removal_lock; bool removing; @@ -243,6 +249,13 @@ struct virtio_mem { /* Memory notifier (online/offline events). */ struct notifier_block memory_notifier; +#ifdef CONFIG_PROC_VMCORE + /* vmcore callback for /proc/vmcore handling in kdump mode */ + struct vmcore_cb vmcore_cb; + uint64_t last_block_addr; + bool last_block_plugged; +#endif /* CONFIG_PROC_VMCORE */ + /* Next device in the list of virtio-mem devices. */ struct list_head next; }; @@ -260,6 +273,8 @@ static void virtio_mem_fake_offline_going_offline(unsigned long pfn, static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, unsigned long nr_pages); static void virtio_mem_retry(struct virtio_mem *vm); +static int virtio_mem_create_resource(struct virtio_mem *vm); +static void virtio_mem_delete_resource(struct virtio_mem *vm); /* * Register a virtio-mem device so it will be considered for the online_page @@ -2291,6 +2306,12 @@ static void virtio_mem_run_wq(struct work_struct *work) uint64_t diff; int rc; + if (unlikely(vm->in_kdump)) { + dev_warn_once(&vm->vdev->dev, + "unexpected workqueue run in kdump kernel\n"); + return; + } + hrtimer_cancel(&vm->retry_timer); if (vm->broken) @@ -2392,41 +2413,11 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) return 0; } -static int virtio_mem_init(struct virtio_mem *vm) +static int virtio_mem_init_hotplug(struct virtio_mem *vm) { const struct range pluggable_range = mhp_get_pluggable_range(true); - uint64_t sb_size, addr; - uint16_t node_id; - - if (!vm->vdev->config->get) { - dev_err(&vm->vdev->dev, "config access disabled\n"); - return -EINVAL; - } - - /* - * We don't want to (un)plug or reuse any memory when in kdump. The - * memory is still accessible (but not mapped). - */ - if (is_kdump_kernel()) { - dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n"); - return -EBUSY; - } - - /* Fetch all properties that can't change. */ - virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, - &vm->plugged_size); - virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, - &vm->device_block_size); - virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, - &node_id); - vm->nid = virtio_mem_translate_node_id(vm, node_id); - virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); - virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, - &vm->region_size); - - /* Determine the nid for the device based on the lowest address. */ - if (vm->nid == NUMA_NO_NODE) - vm->nid = memory_add_physaddr_to_nid(vm->addr); + uint64_t unit_pages, sb_size, addr; + int rc; /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) @@ -2496,10 +2487,6 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->offline_threshold); } - dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); - dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); - dev_info(&vm->vdev->dev, "device block size: 0x%llx", - (unsigned long long)vm->device_block_size); dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); if (vm->in_sbm) @@ -2508,10 +2495,170 @@ static int virtio_mem_init(struct virtio_mem *vm) else dev_info(&vm->vdev->dev, "big block size: 0x%llx", (unsigned long long)vm->bbm.bb_size); + + /* create the parent resource for all memory */ + rc = virtio_mem_create_resource(vm); + if (rc) + return rc; + + /* use a single dynamic memory group to cover the whole memory device */ + if (vm->in_sbm) + unit_pages = PHYS_PFN(memory_block_size_bytes()); + else + unit_pages = PHYS_PFN(vm->bbm.bb_size); + rc = memory_group_register_dynamic(vm->nid, unit_pages); + if (rc < 0) + goto out_del_resource; + vm->mgid = rc; + + /* + * If we still have memory plugged, we have to unplug all memory first. + * Registering our parent resource makes sure that this memory isn't + * actually in use (e.g., trying to reload the driver). + */ + if (vm->plugged_size) { + vm->unplug_all_required = true; + dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); + } + + /* register callbacks */ + vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; + rc = register_memory_notifier(&vm->memory_notifier); + if (rc) + goto out_unreg_group; + rc = register_virtio_mem_device(vm); + if (rc) + goto out_unreg_mem; + + return 0; +out_unreg_mem: + unregister_memory_notifier(&vm->memory_notifier); +out_unreg_group: + memory_group_unregister(vm->mgid); +out_del_resource: + virtio_mem_delete_resource(vm); + return rc; +} + +#ifdef CONFIG_PROC_VMCORE +static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr, + uint64_t size) +{ + const uint64_t nb_vm_blocks = size / vm->device_block_size; + const struct virtio_mem_req req = { + .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE), + .u.state.addr = cpu_to_virtio64(vm->vdev, addr), + .u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), + }; + int rc = -ENOMEM; + + dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + + switch (virtio_mem_send_request(vm, &req)) { + case VIRTIO_MEM_RESP_ACK: + return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state); + case VIRTIO_MEM_RESP_ERROR: + rc = -EINVAL; + break; + default: + break; + } + + dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc); + return rc; +} + +static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb, + unsigned long pfn) +{ + struct virtio_mem *vm = container_of(cb, struct virtio_mem, + vmcore_cb); + uint64_t addr = PFN_PHYS(pfn); + bool is_ram; + int rc; + + if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE)) + return true; + if (!vm->plugged_size) + return false; + + /* + * We have to serialize device requests and access to the information + * about the block queried last. + */ + mutex_lock(&vm->hotplug_mutex); + + addr = ALIGN_DOWN(addr, vm->device_block_size); + if (addr != vm->last_block_addr) { + rc = virtio_mem_send_state_request(vm, addr, + vm->device_block_size); + /* On any kind of error, we're going to signal !ram. */ + if (rc == VIRTIO_MEM_STATE_PLUGGED) + vm->last_block_plugged = true; + else + vm->last_block_plugged = false; + vm->last_block_addr = addr; + } + + is_ram = vm->last_block_plugged; + mutex_unlock(&vm->hotplug_mutex); + return is_ram; +} +#endif /* CONFIG_PROC_VMCORE */ + +static int virtio_mem_init_kdump(struct virtio_mem *vm) +{ +#ifdef CONFIG_PROC_VMCORE + dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n"); + vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram; + register_vmcore_cb(&vm->vmcore_cb); + return 0; +#else /* CONFIG_PROC_VMCORE */ + dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n"); + return -EBUSY; +#endif /* CONFIG_PROC_VMCORE */ +} + +static int virtio_mem_init(struct virtio_mem *vm) +{ + uint16_t node_id; + + if (!vm->vdev->config->get) { + dev_err(&vm->vdev->dev, "config access disabled\n"); + return -EINVAL; + } + + /* Fetch all properties that can't change. */ + virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, + &vm->plugged_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, + &vm->device_block_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, + &node_id); + vm->nid = virtio_mem_translate_node_id(vm, node_id); + virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); + virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, + &vm->region_size); + + /* Determine the nid for the device based on the lowest address. */ + if (vm->nid == NUMA_NO_NODE) + vm->nid = memory_add_physaddr_to_nid(vm->addr); + + dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); + dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); + dev_info(&vm->vdev->dev, "device block size: 0x%llx", + (unsigned long long)vm->device_block_size); if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); - return 0; + /* + * We don't want to (un)plug or reuse any memory when in kdump. The + * memory is still accessible (but not exposed to Linux). + */ + if (vm->in_kdump) + return virtio_mem_init_kdump(vm); + return virtio_mem_init_hotplug(vm); } static int virtio_mem_create_resource(struct virtio_mem *vm) @@ -2525,8 +2672,10 @@ static int virtio_mem_create_resource(struct virtio_mem *vm) if (!name) return -ENOMEM; + /* Disallow mapping device memory via /dev/mem completely. */ vm->parent_resource = __request_mem_region(vm->addr, vm->region_size, - name, IORESOURCE_SYSTEM_RAM); + name, IORESOURCE_SYSTEM_RAM | + IORESOURCE_EXCLUSIVE); if (!vm->parent_resource) { kfree(name); dev_warn(&vm->vdev->dev, "could not reserve device region\n"); @@ -2571,7 +2720,6 @@ static bool virtio_mem_has_memory_added(struct virtio_mem *vm) static int virtio_mem_probe(struct virtio_device *vdev) { struct virtio_mem *vm; - uint64_t unit_pages; int rc; BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24); @@ -2590,6 +2738,7 @@ static int virtio_mem_probe(struct virtio_device *vdev) hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vm->retry_timer.function = virtio_mem_timer_expired; vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; + vm->in_kdump = is_kdump_kernel(); /* register the virtqueue */ rc = virtio_mem_init_vq(vm); @@ -2601,53 +2750,15 @@ static int virtio_mem_probe(struct virtio_device *vdev) if (rc) goto out_del_vq; - /* create the parent resource for all memory */ - rc = virtio_mem_create_resource(vm); - if (rc) - goto out_del_vq; - - /* use a single dynamic memory group to cover the whole memory device */ - if (vm->in_sbm) - unit_pages = PHYS_PFN(memory_block_size_bytes()); - else - unit_pages = PHYS_PFN(vm->bbm.bb_size); - rc = memory_group_register_dynamic(vm->nid, unit_pages); - if (rc < 0) - goto out_del_resource; - vm->mgid = rc; - - /* - * If we still have memory plugged, we have to unplug all memory first. - * Registering our parent resource makes sure that this memory isn't - * actually in use (e.g., trying to reload the driver). - */ - if (vm->plugged_size) { - vm->unplug_all_required = true; - dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); - } - - /* register callbacks */ - vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; - rc = register_memory_notifier(&vm->memory_notifier); - if (rc) - goto out_unreg_group; - rc = register_virtio_mem_device(vm); - if (rc) - goto out_unreg_mem; - virtio_device_ready(vdev); /* trigger a config update to start processing the requested_size */ - atomic_set(&vm->config_changed, 1); - queue_work(system_freezable_wq, &vm->wq); + if (!vm->in_kdump) { + atomic_set(&vm->config_changed, 1); + queue_work(system_freezable_wq, &vm->wq); + } return 0; -out_unreg_mem: - unregister_memory_notifier(&vm->memory_notifier); -out_unreg_group: - memory_group_unregister(vm->mgid); -out_del_resource: - virtio_mem_delete_resource(vm); out_del_vq: vdev->config->del_vqs(vdev); out_free_vm: @@ -2657,9 +2768,8 @@ out_free_vm: return rc; } -static void virtio_mem_remove(struct virtio_device *vdev) +static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) { - struct virtio_mem *vm = vdev->priv; unsigned long mb_id; int rc; @@ -2706,7 +2816,8 @@ static void virtio_mem_remove(struct virtio_device *vdev) * away. Warn at least. */ if (virtio_mem_has_memory_added(vm)) { - dev_warn(&vdev->dev, "device still has system memory added\n"); + dev_warn(&vm->vdev->dev, + "device still has system memory added\n"); } else { virtio_mem_delete_resource(vm); kfree_const(vm->resource_name); @@ -2720,6 +2831,23 @@ static void virtio_mem_remove(struct virtio_device *vdev) } else { vfree(vm->bbm.bb_states); } +} + +static void virtio_mem_deinit_kdump(struct virtio_mem *vm) +{ +#ifdef CONFIG_PROC_VMCORE + unregister_vmcore_cb(&vm->vmcore_cb); +#endif /* CONFIG_PROC_VMCORE */ +} + +static void virtio_mem_remove(struct virtio_device *vdev) +{ + struct virtio_mem *vm = vdev->priv; + + if (vm->in_kdump) + virtio_mem_deinit_kdump(vm); + else + virtio_mem_deinit_hotplug(vm); /* reset the device and cleanup the queues */ vdev->config->reset(vdev); @@ -2733,6 +2861,9 @@ static void virtio_mem_config_changed(struct virtio_device *vdev) { struct virtio_mem *vm = vdev->priv; + if (unlikely(vm->in_kdump)) + return; + atomic_set(&vm->config_changed, 1); virtio_mem_retry(vm); } @@ -2758,6 +2889,7 @@ static unsigned int virtio_mem_features[] = { #if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA) VIRTIO_MEM_F_ACPI_PXM, #endif + VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, }; static const struct virtio_device_id virtio_mem_id_table[] = { diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b35bb2d57f62..fdbde1db5ec5 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -24,17 +24,46 @@ MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif -/* wait for pending irq handlers */ -void vp_synchronize_vectors(struct virtio_device *vdev) +/* disable irq handlers */ +void vp_disable_cbs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) + if (vp_dev->intx_enabled) { + /* + * The below synchronize() guarantees that any + * interrupt for this line arriving after + * synchronize_irq() has completed is guaranteed to see + * intx_soft_enabled == false. + */ + WRITE_ONCE(vp_dev->intx_soft_enabled, false); synchronize_irq(vp_dev->pci_dev->irq); + } for (i = 0; i < vp_dev->msix_vectors; ++i) - synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); + disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); +} + +/* enable irq handlers */ +void vp_enable_cbs(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) { + disable_irq(vp_dev->pci_dev->irq); + /* + * The above disable_irq() provides TSO ordering and + * as such promotes the below store to store-release. + */ + WRITE_ONCE(vp_dev->intx_soft_enabled, true); + enable_irq(vp_dev->pci_dev->irq); + return; + } + + for (i = 0; i < vp_dev->msix_vectors; ++i) + enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ @@ -84,6 +113,9 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) struct virtio_pci_device *vp_dev = opaque; u8 isr; + if (!READ_ONCE(vp_dev->intx_soft_enabled)) + return IRQ_NONE; + /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); @@ -141,7 +173,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, 0, vp_dev->msix_names[v], + vp_config_changed, IRQF_NO_AUTOEN, + vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -160,7 +193,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_vring_interrupt, IRQF_NO_AUTOEN, + vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -337,7 +371,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, + vring_interrupt, IRQF_NO_AUTOEN, vp_dev->msix_names[msix_vec], vqs[i]); if (err) @@ -549,6 +583,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, pci_set_master(pci_dev); + vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false; + rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) @@ -557,10 +593,10 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, return 0; err_register: - if (vp_dev->ioaddr) - virtio_pci_legacy_remove(vp_dev); + if (vp_dev->is_legacy) + virtio_pci_legacy_remove(vp_dev); else - virtio_pci_modern_remove(vp_dev); + virtio_pci_modern_remove(vp_dev); err_probe: pci_disable_device(pci_dev); err_enable_device: @@ -587,7 +623,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) unregister_virtio_device(&vp_dev->vdev); - if (vp_dev->ioaddr) + if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index beec047a8f8d..23f6c5c678d5 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -25,6 +25,7 @@ #include <linux/virtio_config.h> #include <linux/virtio_ring.h> #include <linux/virtio_pci.h> +#include <linux/virtio_pci_legacy.h> #include <linux/virtio_pci_modern.h> #include <linux/highmem.h> #include <linux/spinlock.h> @@ -44,16 +45,14 @@ struct virtio_pci_vq_info { struct virtio_pci_device { struct virtio_device vdev; struct pci_dev *pci_dev; + struct virtio_pci_legacy_device ldev; struct virtio_pci_modern_device mdev; - /* In legacy mode, these two point to within ->legacy. */ + bool is_legacy; + /* Where to read and clear interrupt */ u8 __iomem *isr; - /* Legacy only field */ - /* the IO mapping for the PCI config space */ - void __iomem *ioaddr; - /* a list of queues so we can dispatch IRQs */ spinlock_t lock; struct list_head virtqueues; @@ -64,6 +63,7 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; int intx_enabled; + bool intx_soft_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ @@ -102,8 +102,10 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) return container_of(vdev, struct virtio_pci_device, vdev); } -/* wait for pending irq handlers */ -void vp_synchronize_vectors(struct virtio_device *vdev); +/* disable irq handlers */ +void vp_disable_cbs(struct virtio_device *vdev); +/* enable irq handlers */ +void vp_enable_cbs(struct virtio_device *vdev); /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq); /* the config->del_vqs() implementation */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index d62e9835aeec..b3f8128b7983 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -14,6 +14,7 @@ * Michael S. Tsirkin <mst@redhat.com> */ +#include "linux/virtio_pci_legacy.h" #include "virtio_pci_common.h" /* virtio config->get_features() implementation */ @@ -23,7 +24,7 @@ static u64 vp_get_features(struct virtio_device *vdev) /* When someone needs more than 32 feature bits, we'll need to * steal a bit to indicate that the rest are somewhere else. */ - return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); + return vp_legacy_get_features(&vp_dev->ldev); } /* virtio config->finalize_features() implementation */ @@ -38,7 +39,7 @@ static int vp_finalize_features(struct virtio_device *vdev) BUG_ON((u32)vdev->features != vdev->features); /* We only support 32 feature bits. */ - iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + vp_legacy_set_features(&vp_dev->ldev, vdev->features); return 0; } @@ -48,7 +49,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *ioaddr = vp_dev->ldev.ioaddr + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + offset; u8 *ptr = buf; @@ -64,7 +65,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *ioaddr = vp_dev->ldev.ioaddr + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + offset; const u8 *ptr = buf; @@ -78,7 +79,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, static u8 vp_get_status(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + return vp_legacy_get_status(&vp_dev->ldev); } static void vp_set_status(struct virtio_device *vdev, u8 status) @@ -86,28 +87,24 @@ static void vp_set_status(struct virtio_device *vdev, u8 status) struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* We should never be setting status to 0. */ BUG_ON(status == 0); - iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + vp_legacy_set_status(&vp_dev->ldev, status); } static void vp_reset(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* 0 status means a reset. */ - iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + vp_legacy_set_status(&vp_dev->ldev, 0); /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); - /* Flush pending VQ/configuration callbacks. */ - vp_synchronize_vectors(vdev); + vp_legacy_get_status(&vp_dev->ldev); + /* Disable VQ/configuration callbacks. */ + vp_disable_cbs(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) { - /* Setup the vector used for configuration events */ - iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - /* Will also flush the write out to device */ - return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + return vp_legacy_config_vector(&vp_dev->ldev, vector); } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, @@ -123,12 +120,9 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, int err; u64 q_pfn; - /* Select the queue we're interested in */ - iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - /* Check if queue is either not available or already active. */ - num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); - if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) + num = vp_legacy_get_queue_size(&vp_dev->ldev, index); + if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index)) return ERR_PTR(-ENOENT); info->msix_vector = msix_vec; @@ -151,13 +145,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, } /* activate the queue */ - iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, index, q_pfn); - vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; + vq->priv = (void __force *)vp_dev->ldev.ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; if (msix_vec != VIRTIO_MSI_NO_VECTOR) { - iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + msix_vec = vp_legacy_queue_vector(&vp_dev->ldev, index, msix_vec); if (msix_vec == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto out_deactivate; @@ -167,7 +160,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, return vq; out_deactivate: - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, index, 0); out_del_vq: vring_del_virtqueue(vq); return ERR_PTR(err); @@ -178,22 +171,21 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (vp_dev->msix_enabled) { - iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + vp_legacy_queue_vector(&vp_dev->ldev, vq->index, + VIRTIO_MSI_NO_VECTOR); /* Flush the write out to device */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + ioread8(vp_dev->ldev.ioaddr + VIRTIO_PCI_ISR); } /* Select and deactivate the queue */ - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, vq->index, 0); vring_del_virtqueue(vq); } static const struct virtio_config_ops virtio_pci_config_ops = { + .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .get_status = vp_get_status, @@ -211,51 +203,18 @@ static const struct virtio_config_ops virtio_pci_config_ops = { /* the PCI probing function */ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) { + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; struct pci_dev *pci_dev = vp_dev->pci_dev; int rc; - /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ - if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) - return -ENODEV; - - if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { - printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", - VIRTIO_PCI_ABI_VERSION, pci_dev->revision); - return -ENODEV; - } - - rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); - if (rc) { - rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); - } else { - /* - * The virtio ring base address is expressed as a 32-bit PFN, - * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. - */ - dma_set_coherent_mask(&pci_dev->dev, - DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); - } - - if (rc) - dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + ldev->pci_dev = pci_dev; - rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + rc = vp_legacy_probe(ldev); if (rc) return rc; - rc = -ENOMEM; - vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (!vp_dev->ioaddr) - goto err_iomap; - - vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR; - - /* we use the subsystem vendor/device id as the virtio vendor/device - * id. this allows us to use the same PCI vendor/device id for all - * virtio devices and to identify the particular virtio driver by - * the subsystem ids */ - vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; - vp_dev->vdev.id.device = pci_dev->subsystem_device; + vp_dev->isr = ldev->isr; + vp_dev->vdev.id = ldev->id; vp_dev->vdev.config = &virtio_pci_config_ops; @@ -264,16 +223,11 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) vp_dev->del_vq = del_vq; return 0; - -err_iomap: - pci_release_region(pci_dev, 0); - return rc; } void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) { - struct pci_dev *pci_dev = vp_dev->pci_dev; + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; - pci_iounmap(pci_dev, vp_dev->ioaddr); - pci_release_region(pci_dev, 0); + vp_legacy_remove(ldev); } diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c new file mode 100644 index 000000000000..9b97680dd02b --- /dev/null +++ b/drivers/virtio/virtio_pci_legacy_dev.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "linux/virtio_pci.h" +#include <linux/virtio_pci_legacy.h> +#include <linux/module.h> +#include <linux/pci.h> + + +/* + * vp_legacy_probe: probe the legacy virtio pci device, note that the + * caller is required to enable PCI device before calling this function. + * @ldev: the legacy virtio-pci device + * + * Return 0 on succeed otherwise fail + */ +int vp_legacy_probe(struct virtio_pci_legacy_device *ldev) +{ + struct pci_dev *pci_dev = ldev->pci_dev; + int rc; + + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) + return -ENODEV; + + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) + return -ENODEV; + + rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); + if (rc) { + rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + } else { + /* + * The virtio ring base address is expressed as a 32-bit PFN, + * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. + */ + dma_set_coherent_mask(&pci_dev->dev, + DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); + } + + if (rc) + dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + + rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + if (rc) + return rc; + + ldev->ioaddr = pci_iomap(pci_dev, 0, 0); + if (!ldev->ioaddr) + goto err_iomap; + + ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR; + + ldev->id.vendor = pci_dev->subsystem_vendor; + ldev->id.device = pci_dev->subsystem_device; + + return 0; +err_iomap: + pci_release_region(pci_dev, 0); + return rc; +} +EXPORT_SYMBOL_GPL(vp_legacy_probe); + +/* + * vp_legacy_probe: remove and cleanup the legacy virtio pci device + * @ldev: the legacy virtio-pci device + */ +void vp_legacy_remove(struct virtio_pci_legacy_device *ldev) +{ + struct pci_dev *pci_dev = ldev->pci_dev; + + pci_iounmap(pci_dev, ldev->ioaddr); + pci_release_region(pci_dev, 0); +} +EXPORT_SYMBOL_GPL(vp_legacy_remove); + +/* + * vp_legacy_get_features - get features from device + * @ldev: the legacy virtio-pci device + * + * Returns the features read from the device + */ +u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev) +{ + + return ioread32(ldev->ioaddr + VIRTIO_PCI_HOST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_features); + +/* + * vp_legacy_get_driver_features - get driver features from device + * @ldev: the legacy virtio-pci device + * + * Returns the driver features read from the device + */ +u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev) +{ + return ioread32(ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_driver_features); + +/* + * vp_legacy_set_features - set features to device + * @ldev: the legacy virtio-pci device + * @features: the features set to device + */ +void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, + u32 features) +{ + iowrite32(features, ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_features); + +/* + * vp_legacy_get_status - get the device status + * @ldev: the legacy virtio-pci device + * + * Returns the status read from device + */ +u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev) +{ + return ioread8(ldev->ioaddr + VIRTIO_PCI_STATUS); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_status); + +/* + * vp_legacy_set_status - set status to device + * @ldev: the legacy virtio-pci device + * @status: the status set to device + */ +void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, + u8 status) +{ + iowrite8(status, ldev->ioaddr + VIRTIO_PCI_STATUS); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_status); + +/* + * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue + * @ldev: the legacy virtio-pci device + * @index: queue index + * @vector: the config vector + * + * Returns the config vector read from the device + */ +u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, + u16 index, u16 vector) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + /* Flush the write out to device */ + return ioread16(ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); +} +EXPORT_SYMBOL_GPL(vp_legacy_queue_vector); + +/* + * vp_legacy_config_vector - set the vector for config interrupt + * @ldev: the legacy virtio-pci device + * @vector: the config vector + * + * Returns the config vector read from the device + */ +u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, + u16 vector) +{ + /* Setup the vector used for configuration events */ + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Verify we had enough resources to assign the vector */ + /* Will also flush the write out to device */ + return ioread16(ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); +} +EXPORT_SYMBOL_GPL(vp_legacy_config_vector); + +/* + * vp_legacy_set_queue_address - set the virtqueue address + * @ldev: the legacy virtio-pci device + * @index: the queue index + * @queue_pfn: pfn of the virtqueue + */ +void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, + u16 index, u32 queue_pfn) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite32(queue_pfn, ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_queue_address); + +/* + * vp_legacy_get_queue_enable - enable a virtqueue + * @ldev: the legacy virtio-pci device + * @index: the queue index + * + * Returns whether a virtqueue is enabled or not + */ +bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, + u16 index) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + return ioread32(ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_queue_enable); + +/* + * vp_legacy_get_queue_size - get size for a virtqueue + * @ldev: the legacy virtio-pci device + * @index: the queue index + * + * Returns the size of the virtqueue + */ +u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, + u16 index) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + return ioread16(ldev->ioaddr + VIRTIO_PCI_QUEUE_NUM); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_queue_size); + +MODULE_VERSION("0.1"); +MODULE_DESCRIPTION("Legacy Virtio PCI Device"); +MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 30654d3a0b41..5455bc041fb6 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev) */ while (vp_modern_get_status(mdev)) msleep(1); - /* Flush pending VQ/configuration callbacks. */ - vp_synchronize_vectors(vdev); + /* Disable VQ/configuration callbacks. */ + vp_disable_cbs(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -380,6 +380,7 @@ static bool vp_get_shm_region(struct virtio_device *vdev, } static const struct virtio_config_ops virtio_pci_config_nodev_ops = { + .enable_cbs = vp_enable_cbs, .get = NULL, .set = NULL, .generation = vp_generation, @@ -397,6 +398,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { }; static const struct virtio_config_ops virtio_pci_config_ops = { + .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .generation = vp_generation, diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dd95dfd85e98..00f64f2f8b72 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -14,6 +14,9 @@ #include <linux/spinlock.h> #include <xen/xen.h> +static bool force_used_validation = false; +module_param(force_used_validation, bool, 0444); + #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ #define BAD_RING(_vq, fmt, args...) \ @@ -79,8 +82,8 @@ struct vring_desc_state_packed { }; struct vring_desc_extra { - dma_addr_t addr; /* Buffer DMA addr. */ - u32 len; /* Buffer length. */ + dma_addr_t addr; /* Descriptor DMA addr. */ + u32 len; /* Descriptor length. */ u16 flags; /* Descriptor flags. */ u16 next; /* The next desc state in a list. */ }; @@ -182,6 +185,9 @@ struct vring_virtqueue { } packed; }; + /* Per-descriptor in buffer length */ + u32 *buflen; + /* How to notify other side. FIXME: commonalize hcalls! */ bool (*notify)(struct virtqueue *vq); @@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unsigned int i, n, avail, descs_used, prev, err_idx; int head; bool indirect; + u32 buflen = 0; START_USE(vq); @@ -571,12 +578,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, indirect); + buflen += sg->length; } } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); if (!indirect && vq->use_dma_api) - vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; if (indirect) { @@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, else vq->split.desc_state[head].indir_desc = ctx; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[head] = buflen; + /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); @@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } + if (vq->buflen && unlikely(*len > vq->buflen[i])) { + BAD_RING(vq, "used len %d is larger than in buflen %u\n", + *len, vq->buflen[i]); + return NULL; + } /* detach_buf_split clears data, so grab it now. */ ret = vq->split.desc_state[i].data; @@ -1050,21 +1067,24 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, } static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - gfp_t gfp) + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) { struct vring_packed_desc *desc; struct scatterlist *sg; unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; + u32 buflen = 0; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); + if (!desc) + return -ENOMEM; if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); @@ -1089,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(sg->length); i++; + if (n >= out_sgs) + buflen += sg->length; } } @@ -1142,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[id] = buflen; + vq->num_added += 1; pr_debug("Added buffer head %i to %p\n", head, vq); @@ -1176,6 +1202,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int i, n, c, descs_used, err_idx; __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; + int err; + u32 buflen = 0; START_USE(vq); @@ -1191,9 +1219,14 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); - if (virtqueue_use_indirect(_vq, total_sg)) - return virtqueue_add_indirect_packed(vq, sgs, total_sg, - out_sgs, in_sgs, data, gfp); + if (virtqueue_use_indirect(_vq, total_sg)) { + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, + in_sgs, data, gfp); + if (err != -ENOMEM) + return err; + + /* fall back on direct */ + } head = vq->packed.next_avail_idx; avail_used_flags = vq->packed.avail_used_flags; @@ -1250,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, 1 << VRING_PACKED_DESC_F_AVAIL | 1 << VRING_PACKED_DESC_F_USED; } + if (n >= out_sgs) + buflen += sg->length; } } @@ -1269,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[id] = buflen; + /* * A driver MUST NOT make the first descriptor in the list * available before all subsequent descriptors comprising @@ -1455,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", id); return NULL; } + if (vq->buflen && unlikely(*len > vq->buflen[id])) { + BAD_RING(vq, "used len %d is larger than in buflen %u\n", + *len, vq->buflen[id]); + return NULL; + } /* detach_buf_packed clears data, so grab it now. */ ret = vq->packed.desc_state[id].data; @@ -1660,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed( struct vring_virtqueue *vq; struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; size_t ring_size_in_bytes, event_size_in_bytes; @@ -1749,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed( if (!vq->packed.desc_extra) goto err_desc_extra; + if (!drv->suppress_used_validation || force_used_validation) { + vq->buflen = kmalloc_array(num, sizeof(*vq->buflen), + GFP_KERNEL); + if (!vq->buflen) + goto err_buflen; + } else { + vq->buflen = NULL; + } + /* No callback? Tell other side not to bother us. */ if (!callback) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; @@ -1761,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed( spin_unlock(&vdev->vqs_list_lock); return &vq->vq; +err_buflen: + kfree(vq->packed.desc_extra); err_desc_extra: kfree(vq->packed.desc_state); err_desc_state: @@ -2168,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, void (*callback)(struct virtqueue *), const char *name) { + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); struct vring_virtqueue *vq; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) @@ -2227,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, if (!vq->split.desc_extra) goto err_extra; + if (!drv->suppress_used_validation || force_used_validation) { + vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen), + GFP_KERNEL); + if (!vq->buflen) + goto err_buflen; + } else { + vq->buflen = NULL; + } + /* Put everything in free lists. */ vq->free_head = 0; memset(vq->split.desc_state, 0, vring.num * @@ -2237,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, spin_unlock(&vdev->vqs_list_lock); return &vq->vq; +err_buflen: + kfree(vq->split.desc_extra); err_extra: kfree(vq->split.desc_state); err_state: diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 72eaef2caeb1..f85f860bc10b 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -65,9 +65,8 @@ static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - ops->set_config(vdpa, offset, buf, len); + vdpa_set_config(vdpa, offset, buf, len); } static u32 virtio_vdpa_generation(struct virtio_device *vdev) @@ -145,7 +144,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, /* Assume split virtqueue, switch to packed if necessary */ struct vdpa_vq_state state = {0}; unsigned long flags; - u32 align, num; + u32 align, max_num, min_num = 1; + bool may_reduce_num = true; int err; if (!name) @@ -163,16 +163,21 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (!info) return ERR_PTR(-ENOMEM); - num = ops->get_vq_num_max(vdpa); - if (num == 0) { + max_num = ops->get_vq_num_max(vdpa); + if (max_num == 0) { err = -ENOENT; goto error_new_virtqueue; } + if (ops->get_vq_num_min) + min_num = ops->get_vq_num_min(vdpa); + + may_reduce_num = (max_num == min_num) ? false : true; + /* Create the vring */ align = ops->get_vq_align(vdpa); - vq = vring_create_virtqueue(index, num, align, vdev, - true, true, ctx, + vq = vring_create_virtqueue(index, max_num, align, vdev, + true, may_reduce_num, ctx, virtio_vdpa_notify, callback, name); if (!vq) { err = -ENOMEM; |